Example #1
0
def scrape_date(str_date, cur_date, passive=False):
    #directory = str_date
    games_short_name = str_date + '.all.tar.bz2'
    saved_games_bundle = games_short_name
    return_code = ERROR

    if utils.at_least_as_big_as(saved_games_bundle, SMALL_FILE_SIZE):
        if DEBUG:
            print 'skipping because exists', str_date, saved_games_bundle, \
                'and not small (size=', os.stat(saved_games_bundle).st_size, ')'
        return_code = GOOD
    else:
        RemoveSmallFileIfExists(saved_games_bundle)

        if passive:
            return_code = MISSING
        elif not download_date(str_date, cur_date, saved_games_bundle):
            return_code = ERROR
        else:
            return_code = DOWNLOADED

    # Repackage an existing file, if found
    if utils.at_least_as_big_as(saved_games_bundle, SMALL_FILE_SIZE) and \
            not os.path.exists(repackage_filename(saved_games_bundle)):
        repackage_archive(saved_games_bundle)
        return_code = REPACKAGED

    return return_code
Example #2
0
def scrape_date(str_date, cur_date, passive=False):
    #directory = str_date
    games_short_name = str_date + '.all.tar.bz2'
    saved_games_bundle = games_short_name
    return_code = ERROR

    if utils.at_least_as_big_as(saved_games_bundle, SMALL_FILE_SIZE):
        if DEBUG:
            print 'skipping because exists', str_date, saved_games_bundle, \
                'and not small (size=', os.stat(saved_games_bundle).st_size, ')'
        return_code = GOOD
    else:
        RemoveSmallFileIfExists(saved_games_bundle)

        if passive:
            return_code = MISSING

        elif not download_date(str_date, cur_date, saved_games_bundle):
            return_code = ERROR

        return_code = DOWNLOADED

    # Repackage an existing file, if found
    if utils.at_least_as_big_as(saved_games_bundle, SMALL_FILE_SIZE) and \
            not os.path.exists(repackage_filename(saved_games_bundle)):
        repackage_archive(saved_games_bundle)
        return_code = REPACKAGED

    return return_code
Example #3
0
    args = parser.parse_args()

    utils.ensure_exists('static/scrape_data')
    os.chdir('static/scrape_data')

    for cur_date in utils.daterange(datetime.date(2010, 10, 15),
                                    datetime.date.today()):
        str_date = time.strftime("%Y%m%d", cur_date.timetuple())
        if not utils.includes_day(args, str_date):
            print 'skipping', str_date, 'because not in cmd line arg daterange'
            continue
        directory = str_date
        print str_date
        games_short_name = str_date + '.all.tar.bz2'
        saved_games_bundle = directory + '/' + games_short_name
        if utils.at_least_as_big_as(saved_games_bundle, SMALL_FILE_SIZE):
            print 'skipping because exists', str_date, saved_games_bundle, \
                'and not small (size=', os.stat(saved_games_bundle).st_size, ')'
            continue
        if not os.path.exists(directory):
            os.mkdir(directory)
        RemoveSmallFileIfExists(saved_games_bundle)

        url = IsotropicGamesCollectionUrl(cur_date)

        print 'getting', saved_games_bundle, 'at', url
        filename, headers = MyURLOpener().retrieve(url, saved_games_bundle)

        time.sleep(5)
        os.chdir(directory)
        cmd = 'tar -xjvf ' + games_short_name
Example #4
0
        os.unlink(fn)


args = parser.parse_args()

for cur_date in utils.daterange(datetime.date(2010, 10, 15),
                                datetime.date.today()):
    str_date = time.strftime("%Y%m%d", cur_date.timetuple())
    if not utils.IncludesDay(args, str_date):
        print 'skipping', str_date, 'because not in cmd line arg daterange'
        continue
    directory = str_date
    print str_date
    games_short_name = str_date + '.all.tar.bz2'
    saved_games_bundle = directory + '/' + games_short_name
    if utils.at_least_as_big_as(saved_games_bundle, SMALL_FILE_SIZE):
        print 'skipping because exists', str_date, saved_games_bundle, \
            'and not small (size=', os.stat(saved_games_bundle).st_size, ')'
    else:
        if not os.path.exists(directory):
            os.mkdir(directory)
        RemoveSmallFileIfExists(saved_games_bundle)

        urls_by_priority = [
            CouncilroomGamesCollectionUrl(cur_date),
            IsotropicGamesCollectionUrl(cur_date)
        ]

        for url in urls_by_priority:
            print 'getting', saved_games_bundle, 'at', url
            contents = urllib.urlopen(url).read()