def get_we_parameter(vocab, w2v_file): w2v_reader = BigFile(w2v_file) ndims = w2v_reader.ndims we = [] # we.append([0]*ndims) for i in range(len(vocab)): try: vec = w2v_reader.read_one(vocab.idx2word[i]) except: vec = np.random.uniform(-1, 1, ndims) we.append(vec) print('getting pre-trained parameter for word embedding initialization', np.shape(we)) return np.array(we)
def process(options, collection, featname_1, featname_2, sub_collections): rootpath = options.rootpath target_feat_dir = os.path.join(rootpath, collection, 'FeatureData', featname_1+"_"+featname_2) if os.path.exists(target_feat_dir): if options.overwrite: logger.info('%s exists! overwrite.', target_feat_dir) else: logger.info('%s exists! quit.', target_feat_dir) sys.exit(0) else: os.makedirs(target_feat_dir) target_feat_file = os.path.join(target_feat_dir, 'id.feature.txt') sub_collections = sub_collections.split('@') with open(target_feat_file, 'w') as fw_feat: for collect in sub_collections: feat_dir_1 = os.path.join(rootpath, collect, 'FeatureData', featname_1) feat_dir_2 = os.path.join(rootpath, collect, 'FeatureData', featname_2) featfile_1 = BigFile(feat_dir_1) featfile_2 = BigFile(feat_dir_2) print(">>> Process %s" % collect) progbar = Progbar(len(featfile_1.names)) for name in featfile_1.names: feat_1 = featfile_1.read_one(name) feat_2 = featfile_2.read_one(name) fw_feat.write('%s %s\n' % (name, ' '.join(['%g'%x for x in feat_1+feat_2]))) progbar.add(1) # transform txt to bin format txt2bin(len(feat_1)+len(feat_2), target_feat_file, target_feat_dir, options.overwrite) ln_target_feat_dir = os.path.join(rootpath, collection, 'FeatureData', 'resnext101-resnet152') os.system('ln -s %s %s' % (target_feat_dir, ln_target_feat_dir))
def process(options, collection, featname, sub_collections, set_style): rootpath = options.rootpath target_feat_dir = os.path.join(rootpath, collection, 'FeatureData', featname) target_img_file = os.path.join(rootpath, collection, set_style, collection + '.txt') if os.path.exists(target_feat_dir): if options.overwrite: logger.info('%s exists! overwrite.', target_feat_dir) else: logger.info('%s exists! quit.', target_feat_dir) sys.exit(0) else: os.makedirs(target_feat_dir) target_feat_file = os.path.join(target_feat_dir, 'id.feature.txt') target_id_file = os.path.join(target_feat_dir, 'id.txt') sub_collections = sub_collections.split('@') img_ids = [] with open(target_feat_file, 'w') as fw_feat, open(target_id_file, 'w') as fw_id: for collect in sub_collections: feat_dir = os.path.join(rootpath, collect, 'FeatureData', featname) featfile = BigFile(feat_dir) print(">>> Process %s" % collect) progbar = Progbar(len(featfile.names)) for name in featfile.names: feat = featfile.read_one(name) fw_feat.write('%s %s\n' % (name, ' '.join(['%g' % x for x in feat]))) progbar.add(1) img_ids.extend(featfile.names) fw_id.write(' '.join(img_ids)) if os.path.exists(target_img_file): logger.info('%s exists! quit.', target_img_file) return 0 else: if not os.path.exists(os.path.dirname(target_img_file)): os.makedirs(os.path.dirname(target_img_file)) with open(target_img_file, 'w') as fw_img: fw_img.write('\n'.join(img_ids) + '\n')