Example #1
0
            X = X + tmp

            print("%s %d to %d finished" % (prefix, lo, hi))
            sys.stdout.flush()
        return X

    BOOLEAN_LOAD_PP_COOCC_FROM_FILE = True
    X, Y = None, None
    if BOOLEAN_LOAD_PP_COOCC_FROM_FILE:
        print 'Loading project project negative_co-occurrence matrix'
        t1 = time.time()
        start_idx = range(0, n_users, batch_size)
        end_idx = start_idx[1:] + [n_users]
        X = _load_coord_matrix(start_idx, end_idx, n_projects, n_projects, prefix = 'project') #project project co-occurrence matrix
        print 'dumping matrix ...'
        text_utils.save_pickle(X, os.path.join(DATA_DIR,'negative_pro_pro_cooc_fold%d.dat'%FOLD))
        t2 = time.time()
        print 'Time : %d seconds'%(t2-t1)
    else:
        print 'test loading model from pickle file'
        t1 = time.time()
        X = text_utils.load_pickle(os.path.join(DATA_DIR,'negative_pro_pro_cooc_fold%d.dat'%FOLD))
        t2 = time.time()
        print '[INFO]: sparse matrix size of project project negative_co-occurrence matrix: %d mb\n' % (
                                                        (X.data.nbytes + X.indices.nbytes + X.indptr.nbytes) / (1024 * 1024))
        print 'Time : %d seconds'%(t2-t1)

    #X = None
    BOOLEAN_LOAD_UU_COOCC_FROM_FILE = True
    if BOOLEAN_LOAD_UU_COOCC_FROM_FILE:
        print 'Loading user user negative_co-occurrence matrix'
Example #2
0
        sys.stdout.flush()
    return X


BOOLEAN_NEGATIVE_LOAD_PP_COOCC_FROM_FILE = True
X_neg, Y_neg = None, None
if BOOLEAN_NEGATIVE_LOAD_PP_COOCC_FROM_FILE:
    print 'Loading negative project project co-occurrence matrix'
    t1 = time.time()
    start_idx = range(0, n_users, batch_size)
    end_idx = start_idx[1:] + [n_users]
    X_neg = _load_negative_coord_matrix(start_idx, end_idx, n_projects, n_projects,
                           prefix='%s-project'%NEGATIVE_SELECTION_MODE)  # project project co-occurrence matrix
    print X_neg
    print 'dumping matrix ...'
    text_utils.save_pickle(X_neg, os.path.join(DATA_DIR, '%s_negative_pro_pro_cooc.dat'%NEGATIVE_SELECTION_MODE))
    t2 = time.time()
    print 'Time : %d seconds' % (t2 - t1)
else:
    print 'test loading model from pickle file'
    t1 = time.time()
    X_neg = text_utils.load_pickle(os.path.join(DATA_DIR, '%s_negative_pro_pro_cooc.dat'%NEGATIVE_SELECTION_MODE))
    t2 = time.time()
    print '[INFO]: sparse matrix size of project project co-occurrence matrix: %d mb\n' % (
        (X_neg.data.nbytes + X_neg.indices.nbytes + X_neg.indptr.nbytes) / (1024 * 1024))
    print 'Time : %d seconds' % (t2 - t1)

# X = None
BOOLEAN_LOAD_NEGATIVE_UU_COOCC_FROM_FILE = False
if BOOLEAN_LOAD_NEGATIVE_UU_COOCC_FROM_FILE:
    print 'Loading negative user user co-occurrence matrix'
def produce_neg_embeddings(DATA_DIR,
                           train_data,
                           n_users,
                           n_items,
                           batch_size=5000,
                           iter=0):
    print n_users, n_items

    #clear the negative-co-temp folder:
    if os.path.exists(os.path.join(DATA_DIR, 'negative-co-temp')):
        for f in glob.glob(os.path.join(DATA_DIR, 'negative-co-temp',
                                        '*.npy')):
            os.remove(f)

    GENERATE_ITEM_ITEM_COOCCURENCE_FILE = True
    if GENERATE_ITEM_ITEM_COOCCURENCE_FILE:
        t1 = time.time()
        print 'Generating item item negative_co-occurrence matrix'
        start_idx = range(0, n_users, batch_size)
        end_idx = start_idx[1:] + [n_users]
        Parallel(n_jobs=1)(
            delayed(_coord_batch)(DATA_DIR, lo, hi, train_data, prefix='item')
            for lo, hi in zip(start_idx, end_idx))
        t2 = time.time()
        print 'Time : %d seconds' % (t2 - t1)
        pass
    ########################################################################################################################
    ####################Generate user-user co-occurrence matrix based on the same items they backed######################
    #####################        This will build a user-user co-occurrence matrix ##########################################

    def _load_coord_matrix(start_idx, end_idx, nrow, ncol, prefix='item'):
        X = sparse.csr_matrix((nrow, ncol), dtype='float32')

        for lo, hi in zip(start_idx, end_idx):
            coords = np.load(
                os.path.join(DATA_DIR, 'negative-co-temp',
                             'negative_%s_coo_%d_%d.npy' % (prefix, lo, hi)))

            rows = coords[:, 0]
            cols = coords[:, 1]

            tmp = sparse.coo_matrix((np.ones_like(rows), (rows, cols)),
                                    shape=(nrow, ncol),
                                    dtype='float32').tocsr()
            X = X + tmp

            print("%s %d to %d finished" % (prefix, lo, hi))
            sys.stdout.flush()
        return X

    BOOLEAN_LOAD_PP_COOCC_FROM_FILE = True
    X, Y = None, None
    if BOOLEAN_LOAD_PP_COOCC_FROM_FILE:
        print 'Loading item item negative_co-occurrence matrix'
        t1 = time.time()
        start_idx = range(0, n_users, batch_size)
        end_idx = start_idx[1:] + [n_users]
        X = _load_coord_matrix(start_idx,
                               end_idx,
                               n_items,
                               n_items,
                               prefix='item')  #item item co-occurrence matrix
        print 'dumping matrix ...'
        text_utils.save_pickle(
            X,
            os.path.join(DATA_DIR,
                         'negative_item_item_cooc_iter%d.dat' % (iter)))
        t2 = time.time()
        print 'Time : %d seconds' % (t2 - t1)
    else:
        print 'test loading model from pickle file'
        t1 = time.time()
        X = text_utils.load_pickle(
            os.path.join(DATA_DIR,
                         'negative_item_item_cooc_iter%d.dat' % (iter)))
        t2 = time.time()
        print '[INFO]: sparse matrix size of item item negative_co-occurrence matrix: %d mb\n' % (
            (X.data.nbytes + X.indices.nbytes + X.indptr.nbytes) /
            (1024 * 1024))
        print 'Time : %d seconds' % (t2 - t1)

    if os.path.exists(os.path.join(DATA_DIR, 'negative-co-temp')):
        for f in glob.glob(os.path.join(DATA_DIR, 'negative-co-temp',
                                        '*.npy')):
            os.remove(f)
    return X, None
Example #4
0
    return X


BOOLEAN_LOAD_PP_COOCC_FROM_FILE = False
X, Y = None, None
if BOOLEAN_LOAD_PP_COOCC_FROM_FILE:
    print 'Loading project project co-occurrence matrix'
    t1 = time.time()
    start_idx = range(0, n_users, batch_size)
    end_idx = start_idx[1:] + [n_users]
    X = _load_coord_matrix(
        start_idx, end_idx, n_projects, n_projects,
        prefix='project')  #project project co-occurrence matrix
    print X
    print 'dumping matrix ...'
    text_utils.save_pickle(X, os.path.join(DATA_DIR, 'pro_pro_cooc.dat'))
    t2 = time.time()
    print 'Time : %d seconds' % (t2 - t1)
else:
    print 'test loading model from pickle file'
    t1 = time.time()
    X = text_utils.load_pickle(os.path.join(DATA_DIR, 'pro_pro_cooc.dat'))
    t2 = time.time()
    print '[INFO]: sparse matrix size of project project co-occurrence matrix: %d mb\n' % (
        (X.data.nbytes + X.indices.nbytes + X.indptr.nbytes) / (1024 * 1024))
    print 'Time : %d seconds' % (t2 - t1)

#X = None
BOOLEAN_LOAD_UU_COOCC_FROM_FILE = False
if BOOLEAN_LOAD_UU_COOCC_FROM_FILE:
    print 'Loading user user co-occurrence matrix'
Example #5
0
    for lo, hi in zip(start_idx, end_idx):
        coords = np.load(
            os.path.join(DATA_DIR, 'negative-co-temp',
                         'negative_%s_coo_%d_%d.npy' % (prefix, lo, hi)))

        rows = coords[:, 0]
        cols = coords[:, 1]

        tmp = sparse.coo_matrix((np.ones_like(rows), (rows, cols)),
                                shape=(nrow, ncol),
                                dtype='float32').tocsr()
        X = X + tmp

        print("%s %d to %d finished" % (prefix, lo, hi))
        sys.stdout.flush()
    return X


X, Y = None, None
print 'Loading item item negative_co-occurrence matrix and saving to pickle file for fast loading'
t1 = time.time()
start_idx = range(0, n_users, batch_size)
end_idx = start_idx[1:] + [n_users]
X = _load_coord_matrix(start_idx, end_idx, n_items, n_items,
                       prefix='item')  #item item co-occurrence matrix
print 'dumping matrix ...'
text_utils.save_pickle(X, os.path.join(DATA_DIR,
                                       'negative_item_item_cooc.dat'))
t2 = time.time()
print 'Time : %d seconds' % (t2 - t1)
Example #6
0
        print("%s %d to %d finished" % (prefix, lo, hi))
        sys.stdout.flush()
    return X


BOOLEAN_LOAD_PP_COOCC_FROM_FILE = True
X, Y = None, None
if BOOLEAN_LOAD_PP_COOCC_FROM_FILE:
    print 'Loading item item co-occurrence matrix'
    t1 = time.time()
    start_idx = range(0, n_users, batch_size)
    end_idx = start_idx[1:] + [n_users]
    X = _load_coord_matrix(start_idx, end_idx, n_items, n_items,
                           prefix='item')  #item item co-occurrence matrix
    print 'dumping matrix ...'
    text_utils.save_pickle(X, os.path.join(DATA_DIR, 'item_item_cooc.dat'))
    t2 = time.time()
    print 'Time : %d seconds' % (t2 - t1)
else:
    print 'test loading model from pickle file'
    t1 = time.time()
    X = text_utils.load_pickle(os.path.join(DATA_DIR, 'item_item_cooc.dat'))
    t2 = time.time()
    print '[INFO]: sparse matrix size of item-item co-occurrence matrix: %d mb\n' % (
        (X.data.nbytes + X.indices.nbytes + X.indptr.nbytes) / (1024 * 1024))
    print 'Time : %d seconds' % (t2 - t1)

#X = None
BOOLEAN_LOAD_UU_COOCC_FROM_FILE = True
if BOOLEAN_LOAD_UU_COOCC_FROM_FILE:
    print 'Loading user user co-occurrence matrix'
        sys.stdout.flush()
    return X


BOOLEAN_NEGATIVE_LOAD_PP_COOCC_FROM_FILE = True
X_neg, Y_neg = None, None
if BOOLEAN_NEGATIVE_LOAD_PP_COOCC_FROM_FILE:
    print 'Loading negative project project co-occurrence matrix'
    t1 = time.time()
    start_idx = range(0, n_users, batch_size)
    end_idx = start_idx[1:] + [n_users]
    X_neg = _load_negative_coord_matrix(start_idx, end_idx, n_projects, n_projects,
                           prefix='cate-project')  # project project co-occurrence matrix
    print X_neg
    print 'dumping matrix ...'
    text_utils.save_pickle(X_neg, os.path.join(DATA_DIR, 'cate_negative_pro_pro_cooc_%d.dat'%NEGATIVE_NEIGHBOR_WORDS))
    t2 = time.time()
    print 'Time : %d seconds' % (t2 - t1)
else:
    print 'test loading model from pickle file'
    t1 = time.time()
    X_neg = text_utils.load_pickle(os.path.join(DATA_DIR, 'cate_negative_pro_pro_cooc_%d.dat'%NEGATIVE_NEIGHBOR_WORDS))
    t2 = time.time()
    print '[INFO]: sparse matrix size of project project co-occurrence matrix: %d mb\n' % (
        (X_neg.data.nbytes + X_neg.indices.nbytes + X_neg.indptr.nbytes) / (1024 * 1024))
    print 'Time : %d seconds' % (t2 - t1)

# X = None
BOOLEAN_LOAD_NEGATIVE_UU_COOCC_FROM_FILE = True
if BOOLEAN_LOAD_NEGATIVE_UU_COOCC_FROM_FILE:
    print 'Loading negative user user co-occurrence matrix'
Example #8
0
    return X


BOOLEAN_LOAD_PP_COOCC_FROM_FILE = False
X, Y = None, None
if BOOLEAN_LOAD_PP_COOCC_FROM_FILE:
    print 'Loading project project co-occurrence matrix'
    t1 = time.time()
    start_idx = range(0, n_users, batch_size)
    end_idx = start_idx[1:] + [n_users]
    X = _load_coord_matrix(
        start_idx, end_idx, n_projects, n_projects,
        prefix='project')  #project project co-occurrence matrix
    print X
    print 'dumping matrix ...'
    text_utils.save_pickle(X, os.path.join(DATA_DIR, 'pro_pro_cooc.dat'))
    t2 = time.time()
    print 'Time : %d seconds' % (t2 - t1)
else:
    print 'test loading model from pickle file'
    t1 = time.time()
    X = text_utils.load_pickle(os.path.join(DATA_DIR, 'pro_pro_cooc.dat'))
    t2 = time.time()
    print '[INFO]: sparse matrix size of project project co-occurrence matrix: %d mb\n' % (
        (X.data.nbytes + X.indices.nbytes + X.indptr.nbytes) / (1024 * 1024))
    print 'Time : %d seconds' % (t2 - t1)

#X = None
BOOLEAN_LOAD_UU_COOCC_FROM_FILE = False
if BOOLEAN_LOAD_UU_COOCC_FROM_FILE:
    print 'Loading user user co-occurrence matrix'