def main(): parser = argparse.ArgumentParser() parser.add_argument('--make_data', action='store_true') args = parser.parse_args() if args.make_data: make_data(get_base_param()) prepare(get_base_param()) #save_param common_io.dump_to_file(get_base_param()['param_save_path'],get_base_param()) print 'have a good day'
data_set = {} data_set['image_text.train'] = label_list1 data_set['image_text.val'] = label_list2 vocab_list = [] for vac in label_list1+label_list2: tmp = vac.split('\t') if len(tmp)>1: vocab_list.append(tmp[1]) else: vocab_list.append("") misc = statistic_vocabulary.preProBuildWordVocab(vocab_list) common_io.dump_to_file(vocab_out , misc) #misc = common_io.load_from_file(vocab_out) wordtoix = misc['wordtoix'] print "vocab size" print len(wordtoix) for file_name,label_list in data_set.items(): print 'Starting %s' % file_name ix_label_out_file = nlpcaffe_data_out_dir + file_name ix_label_out = [] for i in xrange(0,len(label_list)): labels = label_list[i]
def main(): label_list = common_io.read_txt_lines( text_file_in ,remove_space = True) misc = preProBuildWordVocab(label_list, word_count_threshold) common_io.dump_to_file(vocab_out , misc)