Ejemplo n.º 1
0
def get_we_parameter(vocab, w2v_file):
    w2v_reader = BigFile(w2v_file)
    ndims = w2v_reader.ndims

    we = []
    # we.append([0]*ndims)
    for i in range(len(vocab)):
        try:
            vec = w2v_reader.read_one(vocab.idx2word[i])
        except:
            vec = np.random.uniform(-1, 1, ndims)
        we.append(vec)
    print('getting pre-trained parameter for word embedding initialization', np.shape(we)) 
    return np.array(we)
Ejemplo n.º 2
0
def process(options, collection, featname_1, featname_2, sub_collections):
    rootpath = options.rootpath
    target_feat_dir = os.path.join(rootpath, collection, 'FeatureData', featname_1+"_"+featname_2)

    if os.path.exists(target_feat_dir):
        if options.overwrite:
            logger.info('%s exists! overwrite.', target_feat_dir)
        else:
            logger.info('%s exists! quit.', target_feat_dir)
            sys.exit(0)
    else:
        os.makedirs(target_feat_dir)

    target_feat_file = os.path.join(target_feat_dir, 'id.feature.txt')
    sub_collections = sub_collections.split('@')

    with open(target_feat_file, 'w') as fw_feat:
        for collect in sub_collections:
            feat_dir_1 = os.path.join(rootpath, collect, 'FeatureData', featname_1)
            feat_dir_2 = os.path.join(rootpath, collect, 'FeatureData', featname_2)
            featfile_1 = BigFile(feat_dir_1)
            featfile_2 = BigFile(feat_dir_2)

            print(">>> Process %s" % collect)
            progbar = Progbar(len(featfile_1.names))
            for name in featfile_1.names:
                feat_1 = featfile_1.read_one(name)
                feat_2 = featfile_2.read_one(name)
                fw_feat.write('%s %s\n' % (name, ' '.join(['%g'%x for x in feat_1+feat_2])))
                progbar.add(1)

    # transform txt to bin format
    txt2bin(len(feat_1)+len(feat_2), target_feat_file, target_feat_dir, options.overwrite)

    ln_target_feat_dir = os.path.join(rootpath, collection, 'FeatureData', 'resnext101-resnet152')
    os.system('ln -s %s %s' % (target_feat_dir, ln_target_feat_dir))
Ejemplo n.º 3
0
def process(options, collection, featname, sub_collections, set_style):
    rootpath = options.rootpath
    target_feat_dir = os.path.join(rootpath, collection, 'FeatureData',
                                   featname)
    target_img_file = os.path.join(rootpath, collection, set_style,
                                   collection + '.txt')

    if os.path.exists(target_feat_dir):
        if options.overwrite:
            logger.info('%s exists! overwrite.', target_feat_dir)
        else:
            logger.info('%s exists! quit.', target_feat_dir)
            sys.exit(0)
    else:
        os.makedirs(target_feat_dir)

    target_feat_file = os.path.join(target_feat_dir, 'id.feature.txt')
    target_id_file = os.path.join(target_feat_dir, 'id.txt')
    sub_collections = sub_collections.split('@')
    img_ids = []

    with open(target_feat_file, 'w') as fw_feat, open(target_id_file,
                                                      'w') as fw_id:
        for collect in sub_collections:
            feat_dir = os.path.join(rootpath, collect, 'FeatureData', featname)
            featfile = BigFile(feat_dir)

            print(">>> Process %s" % collect)
            progbar = Progbar(len(featfile.names))
            for name in featfile.names:
                feat = featfile.read_one(name)
                fw_feat.write('%s %s\n' %
                              (name, ' '.join(['%g' % x for x in feat])))
                progbar.add(1)

            img_ids.extend(featfile.names)

        fw_id.write(' '.join(img_ids))

    if os.path.exists(target_img_file):
        logger.info('%s exists! quit.', target_img_file)
        return 0
    else:
        if not os.path.exists(os.path.dirname(target_img_file)):
            os.makedirs(os.path.dirname(target_img_file))
        with open(target_img_file, 'w') as fw_img:
            fw_img.write('\n'.join(img_ids) + '\n')