def npc_descriptions(): CATEGORIES = ( ("base", NpcDescription.BASE), ("body", NpcDescription.BODY), ("conflict_style_physical", NpcDescription.CONFLICT_PHYSICAL), ("conflict_style_verbal", NpcDescription.CONFLICT_VERBAL), ("disability", NpcDescription.DISABILITY), ("emotional_expressions", NpcDescription.EXPRESSION), ("face", NpcDescription.FACE), ("hair", NpcDescription.HAIR), ("marks", NpcDescription.MARK), ("other", NpcDescription.OTHER), ("personality_quirks", NpcDescription.PERSONALITY_QUIRKS), ("physical_skills", NpcDescription.PHYSICAL_SKILLS), ) for filename, category in CATEGORIES: with open("scrapers/looks/%s.txt" % filename, "r") as f: print("adding %s" % filename) lines = f.readlines() for chunk in chunks(lines, 1000): with transaction.atomic(): for line in chunk: NpcDescription.objects.get_or_create(text=line.replace( "\n", ""), category=category)
def send_to_firehose(records): # records should be a list of dicts if type(records) is list: # batch up the list below the limits of firehose for batch in chunks(records, FIREHOSE_BATCH_SIZE): response = f_hose.put_record_batch( DeliveryStreamName=FIREHOSE_DELIVERY_STREAM, Records=[{ 'Data': bytes(str(json.dumps(record) + '\n').encode('UTF-8')) } for record in batch]) logger.debug('firehose response is: {}'.format(response))
def load_cities_into_db(cls): countries_and_cities = cls.get_countries_and_cities() # Get chunks of the loaded data for multiprocessing chunked_countries_and_cities = chunks(countries_and_cities, 1000) pool = mp.Pool(mp.cpu_count()) # process the 100 chunks in parallel [ pool.apply_async(persist_cities_data_into_db, args=(chunk, )) for chunk in chunked_countries_and_cities ] pool.close() pool.join()
def send_to_firehose(records): f_hose = boto3.client("firehose") # records should be a list of dicts response = None if type(records) is list: # batch up the list below the limits of firehose for batch in chunks(records, FIREHOSE_BATCH_SIZE): response = f_hose.put_record_batch( DeliveryStreamName=FIREHOSE_DELIVERY_STREAM, Records=[{ "Data": bytes(str(json.dumps(record) + "\n").encode("UTF-8")) } for record in batch], ) logger.debug("firehose response is: {}".format(response))
def race_names(): PREFIXES = [ 'fng', ] ALL_FILES = os.listdir('scrapers/names/') for group in Group.objects.filter(category=Group.RACE): categories = [Name.MALE, Name.FEMALE, Name.SURNAME] for value, name in Name.CATEGORIES: if group.names.filter(category=value).count() > 1000: continue if value not in categories: continue if name == "Surname": filename = ("%s_surnames.txt" % group.name).lower() else: filename = ("%s_%s_names.txt" % (group.name, name)).lower() find_files = [filename] find_files += ["%s_%s" % (pre, filename) for pre in PREFIXES] files = [name for name in find_files if name in ALL_FILES] if not files: print(group.name) continue for f in files: with open('scrapers/names/' + f, "r") as names_f: names = names_f.readlines() counter = 0 for chunk in chunks(names, 1000): if counter > 5: continue counter += 1 with transaction.atomic(): for n in chunk: Name.objects.get_or_create(group=group, category=value, name=n.replace( "\n", "")) print("added %s" % f)
def save_avi_to_tfrecords(source_path, destination_path, videos_per_file=FLAGS.num_videos, type=FLAGS.type, video_filenames=None, dense_optical_flow=False): """calls sub-functions convert_avi_to_numpy and save_numpy_to_tfrecords in order to directly export tfrecords files :param source_path: directory where avi videos are stored :param destination_path: directory where tfrecords should be stored :param videos_per_file: specifies the number of videos within one tfrecords file :param use_meta: boolean that indicates whether to use meta information """ global NUM_CHANNELS_VIDEO assert (NUM_CHANNELS_VIDEO == 3 and (not dense_optical_flow)) or ( NUM_CHANNELS_VIDEO == 4 and dense_optical_flow), "correct NUM_CHANNELS_VIDEO" assert type in ALLOWED_TYPES, str(type) + " is not an allowed type" if video_filenames is not None: filenames = video_filenames else: filenames = gfile.Glob(os.path.join(source_path, FILE_FILTER)) if not filenames: raise RuntimeError('No data files found.') print('Total videos found: ' + str(len(filenames))) filenames_split = list(chunks(filenames, videos_per_file)) if type == 'activity_net': meta_dict = create_activity_net_metadata_dicts(FLAGS.source, METADATA_SUBCLIPS_DICT, METADATA_TAXONOMY_DICT, FILE_FILTER) elif type == 'youtube8m': meta_dict = create_youtube8m_metadata_dicts(FLAGS.source, METADATA_DICT, FILE_FILTER) elif type == '20bn_train': meta_dict = create_20bn_metadata_dicts(FLAGS.source, CSV_20BN_TRAIN, FILE_FILTER) elif type == '20bn_valid': meta_dict = create_20bn_metadata_dicts(FLAGS.source, CSV_20BN_VALID, FILE_FILTER) elif type == 'UCF101': meta_dict = create_ucf101_metadata_dicts(FLAGS.source, METADATA_DICT_UCF101, FILE_FILTER) else: meta_dict = None for i, batch in enumerate(filenames_split): data, meta_info = convert_avi_to_numpy( batch, type=type, meta_dict=meta_dict, dense_optical_flow=dense_optical_flow) total_batch_number = int(math.ceil(len(filenames) / videos_per_file)) print('Batch ' + str(i + 1) + '/' + str(total_batch_number)) save_numpy_to_tfrecords(data, destination_path, meta_info, 'train_blobs_batch_', videos_per_file, i + 1, total_batch_number)