Beispiel #1
0
def npc_descriptions():
    CATEGORIES = (
        ("base", NpcDescription.BASE),
        ("body", NpcDescription.BODY),
        ("conflict_style_physical", NpcDescription.CONFLICT_PHYSICAL),
        ("conflict_style_verbal", NpcDescription.CONFLICT_VERBAL),
        ("disability", NpcDescription.DISABILITY),
        ("emotional_expressions", NpcDescription.EXPRESSION),
        ("face", NpcDescription.FACE),
        ("hair", NpcDescription.HAIR),
        ("marks", NpcDescription.MARK),
        ("other", NpcDescription.OTHER),
        ("personality_quirks", NpcDescription.PERSONALITY_QUIRKS),
        ("physical_skills", NpcDescription.PHYSICAL_SKILLS),
    )

    for filename, category in CATEGORIES:
        with open("scrapers/looks/%s.txt" % filename, "r") as f:
            print("adding %s" % filename)
            lines = f.readlines()
            for chunk in chunks(lines, 1000):
                with transaction.atomic():
                    for line in chunk:
                        NpcDescription.objects.get_or_create(text=line.replace(
                            "\n", ""),
                                                             category=category)
def send_to_firehose(records):
    # records should be a list of dicts
    if type(records) is list:
        # batch up the list below the limits of firehose
        for batch in chunks(records, FIREHOSE_BATCH_SIZE):
            response = f_hose.put_record_batch(
                DeliveryStreamName=FIREHOSE_DELIVERY_STREAM,
                Records=[{
                    'Data':
                    bytes(str(json.dumps(record) + '\n').encode('UTF-8'))
                } for record in batch])
            logger.debug('firehose response is: {}'.format(response))
    def load_cities_into_db(cls):
        countries_and_cities = cls.get_countries_and_cities()

        # Get chunks of the loaded data for multiprocessing
        chunked_countries_and_cities = chunks(countries_and_cities, 1000)

        pool = mp.Pool(mp.cpu_count())

        # process the 100 chunks in parallel
        [
            pool.apply_async(persist_cities_data_into_db, args=(chunk, ))
            for chunk in chunked_countries_and_cities
        ]

        pool.close()
        pool.join()
Beispiel #4
0
def send_to_firehose(records):
    f_hose = boto3.client("firehose")

    # records should be a list of dicts
    response = None
    if type(records) is list:
        # batch up the list below the limits of firehose
        for batch in chunks(records, FIREHOSE_BATCH_SIZE):
            response = f_hose.put_record_batch(
                DeliveryStreamName=FIREHOSE_DELIVERY_STREAM,
                Records=[{
                    "Data":
                    bytes(str(json.dumps(record) + "\n").encode("UTF-8"))
                } for record in batch],
            )
            logger.debug("firehose response is: {}".format(response))
Beispiel #5
0
def race_names():
    PREFIXES = [
        'fng',
    ]
    ALL_FILES = os.listdir('scrapers/names/')

    for group in Group.objects.filter(category=Group.RACE):
        categories = [Name.MALE, Name.FEMALE, Name.SURNAME]
        for value, name in Name.CATEGORIES:
            if group.names.filter(category=value).count() > 1000:
                continue
            if value not in categories:
                continue

            if name == "Surname":
                filename = ("%s_surnames.txt" % group.name).lower()
            else:
                filename = ("%s_%s_names.txt" % (group.name, name)).lower()
            find_files = [filename]
            find_files += ["%s_%s" % (pre, filename) for pre in PREFIXES]
            files = [name for name in find_files if name in ALL_FILES]
            if not files:
                print(group.name)
                continue

            for f in files:
                with open('scrapers/names/' + f, "r") as names_f:
                    names = names_f.readlines()

                counter = 0
                for chunk in chunks(names, 1000):
                    if counter > 5:
                        continue
                    counter += 1
                    with transaction.atomic():
                        for n in chunk:
                            Name.objects.get_or_create(group=group,
                                                       category=value,
                                                       name=n.replace(
                                                           "\n", ""))
                print("added %s" % f)
def save_avi_to_tfrecords(source_path,
                          destination_path,
                          videos_per_file=FLAGS.num_videos,
                          type=FLAGS.type,
                          video_filenames=None,
                          dense_optical_flow=False):
    """calls sub-functions convert_avi_to_numpy and save_numpy_to_tfrecords in order to directly export tfrecords files
  :param source_path: directory where avi videos are stored
  :param destination_path: directory where tfrecords should be stored
  :param videos_per_file: specifies the number of videos within one tfrecords file
  :param use_meta: boolean that indicates whether to use meta information
  """
    global NUM_CHANNELS_VIDEO
    assert (NUM_CHANNELS_VIDEO == 3 and (not dense_optical_flow)) or (
        NUM_CHANNELS_VIDEO == 4
        and dense_optical_flow), "correct NUM_CHANNELS_VIDEO"
    assert type in ALLOWED_TYPES, str(type) + " is not an allowed type"

    if video_filenames is not None:
        filenames = video_filenames
    else:
        filenames = gfile.Glob(os.path.join(source_path, FILE_FILTER))
    if not filenames:
        raise RuntimeError('No data files found.')

    print('Total videos found: ' + str(len(filenames)))

    filenames_split = list(chunks(filenames, videos_per_file))

    if type == 'activity_net':
        meta_dict = create_activity_net_metadata_dicts(FLAGS.source,
                                                       METADATA_SUBCLIPS_DICT,
                                                       METADATA_TAXONOMY_DICT,
                                                       FILE_FILTER)
    elif type == 'youtube8m':
        meta_dict = create_youtube8m_metadata_dicts(FLAGS.source,
                                                    METADATA_DICT, FILE_FILTER)
    elif type == '20bn_train':
        meta_dict = create_20bn_metadata_dicts(FLAGS.source, CSV_20BN_TRAIN,
                                               FILE_FILTER)
    elif type == '20bn_valid':
        meta_dict = create_20bn_metadata_dicts(FLAGS.source, CSV_20BN_VALID,
                                               FILE_FILTER)
    elif type == 'UCF101':
        meta_dict = create_ucf101_metadata_dicts(FLAGS.source,
                                                 METADATA_DICT_UCF101,
                                                 FILE_FILTER)
    else:
        meta_dict = None

    for i, batch in enumerate(filenames_split):
        data, meta_info = convert_avi_to_numpy(
            batch,
            type=type,
            meta_dict=meta_dict,
            dense_optical_flow=dense_optical_flow)
        total_batch_number = int(math.ceil(len(filenames) / videos_per_file))
        print('Batch ' + str(i + 1) + '/' + str(total_batch_number))
        save_numpy_to_tfrecords(data, destination_path, meta_info,
                                'train_blobs_batch_', videos_per_file, i + 1,
                                total_batch_number)