Ejemplos de set_runtime_config en Python, ejemplos de graphlab.set_runtime_config en Python

Ejemplo n.º 1

0

Mostrar archivo

def set_libodbc_path(path):
    """
    Set the first path that GraphLab Create will search for libodbc.so.

    Since ODBC requires a driver manager to be installed system-wide, we
    provide this to help you if it is installed in a non-standard location.
    GraphLab Create will also search on the system's default library paths, so
    if you installed your driver manager in a standard way, you shouldn't need
    to worry about this function.
    """
    gl.set_runtime_config('GRAPHLAB_LIBODBC_PREFIX', path)

Ejemplo n.º 2

0

Mostrar archivo

Archivo: DBConnection.py Proyecto: eb777ez/Yelp-Recommendation-System

def set_libodbc_path(path):
    """
    Set the first path that GraphLab Create will search for libodbc.so.

    Since ODBC requires a driver manager to be installed system-wide, we
    provide this to help you if it is installed in a non-standard location.
    GraphLab Create will also search on the system's default library paths, so
    if you installed your driver manager in a standard way, you shouldn't need
    to worry about this function.
    """
    gl.set_runtime_config('GRAPHLAB_LIBODBC_PREFIX', path)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: glcreate.py Proyecto: jad-b/mlsl

def load_graphlab():
    if sys.version_info >= (3, 0):
        raise VersionError("Graphlab is only available in Python 2")
    start = time.clock()  # noqa
    import graphlab
    gl_product_key = os.getenv('GLCREATE_PRODUCT_KEY', False)
    if not gl_product_key:
        print("Please set GLCREATE_PRODUCT_KEY")
        return

    graphlab.product_key.set_product_key(gl_product_key)
    # Display graphlab canvas in notebook
    graphlab.canvas.set_target('ipynb')
    # Number of workers
    graphlab.set_runtime_config('GRAPHLAB_DEFAULT_NUM_PYLAMBDA_WORKERS', 16)
    since = time.clock() - start
    print("Graphlab loaded in {:.3f} seconds".format(since))
    return graphlab

Ejemplo n.º 4

0

Mostrar archivo

import graphlab as gl
import numpy as np
from scipy.spatial.distance import cosine,euclidean
import time
import datetime
from operator import itemgetter
import itertools
import math
import multiprocessing as mp
gl.set_runtime_config('GRAPHLAB_DEFAULT_NUM_PYLAMBDA_WORKERS', 32)
gl.set_runtime_config('GRAPHLAB_DEFAULT_NUM_GRAPH_LAMBDA_WORKERS', 32)
gl.set_runtime_config('GRAPHLAB_FILEIO_MAXIMUM_CACHE_CAPACITY',100000000000)
gl.set_runtime_config('GRAPHLAB_FILEIO_MAXIMUM_CACHE_CAPACITY_PER_FILE',100000000000)


tfidf = False
n_iter = 50
random_sample_size=5000000
k_range = np.arange(285,301,5)
filename = 'tf'
np.random.seed(99)
n_cores = 16


docs = gl.SArray("doc_array")
if tfidf:
    docs = gl.text_analytics.tf_idf(docs)
    docs.apply(lambda row: {k:round(v)+1 for k,v in row.iteritems()})
train,test = gl.text_analytics.random_split(docs,0.1)
train.save("train_data_"+filename)
test.save("test_data_"+filename)

Ejemplo n.º 5

0

Mostrar archivo

import graphlab as gl
import time
gl.set_runtime_config('GRAPHLAB_CACHE_FILE_LOCATIONS', '/mnt/data/tmp')
gl.set_runtime_config('GRAPHLAB_DEFAULT_NUM_GRAPH_LAMBDA_WORKERS', 30)

dpath = '/mnt/data/'


def load_data(dpath, maxn=None):
    cites = gl.SFrame.read_csv(dpath + 'cites.csv',
                               column_type_hints=[int, int])
    paper = gl.SFrame.read_csv(dpath + 'papers.csv',
                               column_type_hints=[int, int])
    if maxn is not None:
        paper = paper[paper['id'] < maxn]
        cites = cites[cites.apply(lambda x: x['p1'] < maxn and x['p2'] < maxn)]
    sg = gl.SGraph(vertices=paper,
                   edges=cites,
                   vid_field='id',
                   src_field='p1',
                   dst_field='p2')
    return sg


def findp_update_fn(src, edge, dst):
    pdst = dst['parent']
    psrc = src['parent']
    for pid, d in pdst.iteritems():
        if pid not in psrc:
            psrc[pid] = d + 1
            src['changed'] = True

Ejemplo n.º 6

0

Mostrar archivo

Archivo: featgen.py Proyecto: mrazakhan/Specnet_bc

	return merged_sf

def eval_model(model, test, col):
	'''Evaluate a trained model using Kaggle scoring.'''
	return log_loss_raw(test[col], model.predict(test, output_type='probability'))
	
def log_loss_raw(target, predicted):
	'''Calculate log_loss between target and predicted and return.'''
	p = predicted.apply(lambda x: min(0.99999, max(1e-5, x)))
	logp = p.apply(lambda x: math.log(x))
	logmp = p.apply(lambda x: (math.log(1-x)))
	return -(target * logp + (1-target) * logmp).mean()


if __name__=='__main__':
	gl.set_runtime_config('GRAPHLAB_CACHE_FILE_LOCATIONS','/home/mraza/tmp/')	
	gl.set_runtime_config('GRAPHLAB_FILEIO_MAXIMUM_CACHE_CAPACITY', 200*1024*1024*1024)
	gl.set_runtime_config('GRAPHLAB_FILEIO_MAXIMUM_CACHE_CAPACITY_PER_FILE', 100*1024*1024*1024)

	parser=argparse.ArgumentParser(description='Spectral Features Preprocessing')
	parser.add_argument('-cf','--clusteral_features',help='Input File containing clusteral features', required=True)
	parser.add_argument('-lf','--labels_file', help='Ground Truth labels file', required=True)
	parser.add_argument('-of','--output_file', help='Output file', required=True)
	parser.add_argument('-cfk','--clusteral_key_column', required=True)
	parser.add_argument('-lfk','--labels_key_column', required=True)
	parser.add_argument('-lfv','--labels_value_column', required=True)
	parser.add_argument('-i','--interaction', required=True)
	parser.add_argument('-j','--join_type', required=False)
	parser.add_argument('-e','--encode', required=False)

	args=parser.parse_args()

Ejemplo n.º 7

0

Mostrar archivo

Archivo: create_image_sframes.py Proyecto: pablojrios/diabetic

# processed/ directories -- where you ran the prep_image.sh.  It will
# put a image-sframes/ directory with train and test SFrames in the
# save_path location below. 

# os.chdir('/home/pablo/Kaggle/kaggle-train')

# preprocessed_image_path = "processed/"
preprocessed_image_path = "processed/"
save_train = False
save_test = True

print "current working directory = %s" % os.getcwd()

save_path = "./"

gl.set_runtime_config("GRAPHLAB_CACHE_FILE_LOCATIONS", os.path.expanduser("~/data/tmp/"))
#gl.set_runtime_config("GRAPHLAB_CACHE_FILE_LOCATIONS", "/media/pablo/OS/Users/Pablo/Downloads/Kaggle/graphlab-cache/")

print "loading images" # las siguientes sentencias tarda en Pablo's notebook 400 segs aprox.
# shuffle the training images1
X = gl.image_analysis.load_images(preprocessed_image_path)
X["is_train"] = X["path"].apply(lambda p: "train" in p)

# Add in all the relevant information in places
source_f = lambda p: re.search("run-(?P<source>[^/]+)", p).group("source")
X["source"] = X["path"].apply(source_f)

extract_name = lambda p: re.search("[0-9]+_(right|left)", p).group(0)
X["name"] = X["path"].apply(extract_name)

X_train = X[X["is_train"] == True]

Ejemplo n.º 8

0

Mostrar archivo

import random
from copy import copy
import os
import graphlab.aggregate as agg
import array

import sys

model_name = "pooling-2"
which_model = 0

print "Running model %d, %s" % (which_model, model_name)

alt_path = os.path.expanduser("~/data/tmp/")
if os.path.exists(alt_path):
    gl.set_runtime_config("GRAPHLAB_CACHE_FILE_LOCATIONS", alt_path)

model_path = "nn_256x256/models/model-%d-%s/" % (which_model, model_name)
model_filename = model_path + "nn_model" 

X_train = gl.SFrame("image-sframes/train-%d/" % which_model)
X_valid = gl.SFrame("image-sframes/validation-%d/" % which_model)
X_test = gl.SFrame("image-sframes/test/")

################################################################################

# init_random vs random_type in ConvolutionLayer. 

dll = gl.deeplearning.layers

nn = gl.deeplearning.NeuralNet()

Ejemplo n.º 9

0

Mostrar archivo

parser.add_argument('-i',
                    '--train',
                    help='Input training matrix',
                    required=True)
parser.add_argument('-t', '--test', help='test data matrix', required=True)
parser.add_argument('-d,',
                    '--modeldir',
                    help='Directory to save the model',
                    default='svmmodel')
parser.add_argument('-r,',
                    '--report',
                    help='report file',
                    default='report.txt')
args = parser.parse_args()

gl.set_runtime_config('GRAPHLAB_CACHE_FILE_LOCATIONS', '/scratch')
test = gl.SFrame.read_csv(args.test, delimiter='\t', header=False)
train = gl.SFrame.read_csv(args.train, delimiter='\t', header=False)
file_report = open(args.report, 'w')

test.save('test_sframe')
train.save('train_sframe')

model = gl.svm_classifier.create(train, target='X1')
predictions = model.predict(test)
print predictions
#file_report.write(predictions)
results = model.evaluate(test)
print results
#file_report.write(results)
model.save(args.modeldir)

Ejemplo n.º 10

0

Mostrar archivo

def eval_model(model, test, col):
    '''Evaluate a trained model using Kaggle scoring.'''
    return log_loss_raw(test[col],
                        model.predict(test, output_type='probability'))


def log_loss_raw(target, predicted):
    '''Calculate log_loss between target and predicted and return.'''
    p = predicted.apply(lambda x: min(0.99999, max(1e-5, x)))
    logp = p.apply(lambda x: math.log(x))
    logmp = p.apply(lambda x: (math.log(1 - x)))
    return -(target * logp + (1 - target) * logmp).mean()


if __name__ == '__main__':
    gl.set_runtime_config('GRAPHLAB_CACHE_FILE_LOCATIONS', '/home/mraza/tmp/')
    gl.set_runtime_config('GRAPHLAB_FILEIO_MAXIMUM_CACHE_CAPACITY',
                          200 * 1024 * 1024 * 1024)
    gl.set_runtime_config('GRAPHLAB_FILEIO_MAXIMUM_CACHE_CAPACITY_PER_FILE',
                          100 * 1024 * 1024 * 1024)

    parser = argparse.ArgumentParser(
        description='Spectral Features Preprocessing')
    parser.add_argument('-cf',
                        '--clusteral_features',
                        help='Input File containing clusteral features',
                        required=True)
    parser.add_argument('-lf',
                        '--labels_file',
                        help='Ground Truth labels file',
                        required=True)

Ejemplo n.º 11

0

Mostrar archivo

Archivo: create_submission.py Proyecto: thekannman/kaggle

import graphlab as gl
import re
import random
from copy import copy
import os
import graphlab.aggregate as agg
import array

import sys

# Change cache file directory to avoid overloading /var
my_graphlab_cache_file_locations = '/home/zak/tmp_graphlab'
gl.set_runtime_config('GRAPHLAB_CACHE_FILE_LOCATIONS',my_graphlab_cache_file_locations)

# gl.set_runtime_config("GRAPHLAB_CACHE_FILE_LOCATIONS", os.path.expanduser("~/data/tmp/"))

base_path = os.getcwd()

model_path = base_path + "/nn_96x96/models/"

train_sf = []
test_sf = []
feature_names = []

for n in [0,1,2,3,4]:
    
    try: 
        Xf_train = gl.SFrame(model_path + "/scores_train_%d" % n)
        Xf_test = gl.SFrame(model_path + "/scores_test_%d" % n)

        train_sf.append(Xf_train)

Ejemplo n.º 12

0

Mostrar archivo

Archivo: create_image_sframes.py Proyecto: pablojrios/diabetic

# processed/ directories -- where you ran the prep_image.sh.  It will
# put a image-sframes/ directory with train and test SFrames in the
# save_path location below.

# os.chdir('/home/pablo/Kaggle/kaggle-train')

# preprocessed_image_path = "processed/"
preprocessed_image_path = "processed/"
save_train = False
save_test = True

print "current working directory = %s" % os.getcwd()

save_path = "./"

gl.set_runtime_config("GRAPHLAB_CACHE_FILE_LOCATIONS",
                      os.path.expanduser("~/data/tmp/"))
#gl.set_runtime_config("GRAPHLAB_CACHE_FILE_LOCATIONS", "/media/pablo/OS/Users/Pablo/Downloads/Kaggle/graphlab-cache/")

print "loading images"  # las siguientes sentencias tarda en Pablo's notebook 400 segs aprox.
# shuffle the training images1
X = gl.image_analysis.load_images(preprocessed_image_path)
X["is_train"] = X["path"].apply(lambda p: "train" in p)

# Add in all the relevant information in places
source_f = lambda p: re.search("run-(?P<source>[^/]+)", p).group("source")
X["source"] = X["path"].apply(source_f)

extract_name = lambda p: re.search("[0-9]+_(right|left)", p).group(0)
X["name"] = X["path"].apply(extract_name)

X_train = X[X["is_train"] == True]

Ejemplo n.º 13

0

Mostrar archivo

import graphlab as gl
import loadgraph as load
import time

gl.set_runtime_config('GRAPHLAB_CACHE_FILE_LOCATIONS', '/mnt/data/tmp')

dpath = '/mnt/data/'


def childs_update_fn(src, edge, dst):
    pdst = dst['childs']
    psrc = src['childs']
    for pid, d in psrc.iteritems():
        if pid not in pdst:
            pdst[pid] = d + 1
            dst['changed'] = True
        else:
            if pdst[pid] > d + 1:
                pdst[pid] = d + 1
                dst['changed'] = True
    dst['childs'] = pdst
    return (src, edge, dst)


def find_childs(g, maxn):
    start = time.time()
    num_changed = len(g.vertices)
    it = 0
    g.vertices['childs'] = g.vertices['__id'].apply(lambda x: {x: 0}
                                                    if x < maxn else {})
    while (num_changed > 0):

Ejemplo n.º 14

0

Mostrar archivo

Archivo: findpcnt.py Proyecto: didaladida/systems-homework

import graphlab as gl
import loadgraph as load
import time

gl.set_runtime_config('GRAPHLAB_CACHE_FILE_LOCATIONS', '/mnt/data/tmp')

dpath = '/mnt/data/'


def node_update_fn(src, edge, dst):
    src['out_edges'] += 1 
    dst['in_edges'] += 1
    return (src, edge, dst)
    
def find_stats(g):
    start = time.time()
    g.vertices['in_edges'] = 0
    g.vertices['out_edges'] = 0
    g = g.triple_apply(node_update_fn, ['in_edges', 'out_edges'])
    print 'Triple apply all finished in: %f secs' % (time.time() - start)
    return g


def cnt_update_fn(src, edge, dst):
    if dst['out_edges'] == dst['counter']:
        src['counter'] += 1
        src['parent-cnt'] += dst['parent-cnt'] + 1
    return (src, edge, dst)

def find_cnt(g):
    start = time.time()

Ejemplo n.º 15

0

Mostrar archivo

import random
from copy import copy
import os
import graphlab.aggregate as agg
import array
import numpy as np
import sys

# Run this script in the same directory as the

train_path = "image-sframes/train-%d/"
valid_path = "image-sframes/validation-%d/"

# Change cache file directory to avoid overloading /var
my_graphlab_cache_file_locations = '/home/zak/tmp_graphlab'
gl.set_runtime_config('GRAPHLAB_CACHE_FILE_LOCATIONS',
                      my_graphlab_cache_file_locations)

X_data = gl.SFrame("image-sframes/train/")


def save_as_train_and_test(X, train_loc, valid_loc):

    # Can't just randomly sample the indices
    all_names = list(X["name"].unique())

    n_valid = (2 * len(all_names)) / 100

    random.shuffle(all_names)

    tr_names = gl.SArray(all_names[n_valid:])
    valid_names = gl.SArray(all_names[:n_valid])

Ejemplo n.º 16

0

Mostrar archivo

import numpy as np
import itertools
from collections import defaultdict
from datetime import datetime
import math
import sys
import os
from scipy.stats import percentileofscore
import graphlab as gl
import graphlab.aggregate as agg
gl.set_runtime_config('GRAPHLAB_CACHE_FILE_LOCATIONS', '/home/mraza/tmp/')
gl.set_runtime_config('GRAPHLAB_DEFAULT_NUM_PYLAMBDA_WORKERS', 48)

# X1,X2,X3,X4,X5,X6,X7
# 2015-10-01 12:08:41,1046885725705,1046910448494,GSM,ITN006,,1.5
# 2015-10-01 16:55:32,1046885725705,1046910448494,GSM,ITN010,,1.5


def distance(l1_lat, l1_lng, l2_lat, l2_lng):
    R = 6371
    # Radius of the earth in km
    d = 0.0
    try:
        l1_lat, l1_lng, l2_lat, l2_lng = float(l1_lat), float(l1_lng), float(
            l2_lat), float(l2_lng)
    except:
        l1_lat, l1_lng, l2_lat, l2_lng = 0.0, 0.0, 0.0, 0.0
    dLat = (l1_lat - l2_lat) * math.pi / 180
    dLon = (l1_lng - l2_lng) * math.pi / 180
    a = math.sin(dLat / 2) * math.sin(dLat / 2) + math.cos(
        (l1_lat) * math.pi / 180) * math.cos(

Ejemplo n.º 17

0

Mostrar archivo

Archivo: reachability-sp.py Proyecto: gogopavl/graphlab-algorithms

    tic = time.time()

    if is_reachable:
        print("Vertex {} is reachable from vertex {} - Distance: {}".format(target_vertex, source_vertex, int(distance)))
    else:
        print("Vertex {} cannot be reached from vertex {} - Distance: {}".format(target_vertex, source_vertex, int(distance)))

    return "Total runtime: {} seconds".format(tic-toc)

if __name__ == '__main__':

    if(len(sys.argv) is 1):
        print("Please add number of workers, dataset path, source vertex, target vertex, and max recursion depth as arguments when loading script")
        sys.exit()
    else:
        workers = int(sys.argv[1])
        path = sys.argv[2]
        source_vertex = long(sys.argv[3])
        target_vertex = long(sys.argv[4])
        # max_depth = int(sys.argv[5])
        # assert max_depth >= 1
    
    # Configure GraphLab to utilize a specific number of workers (cores)
    gl.set_runtime_config('GRAPHLAB_DEFAULT_NUM_PYLAMBDA_WORKERS', workers)

    r_job = gl.deploy.job.create(run_reachability_job, path_to_file = path, source_vertex=source_vertex, target_vertex=target_vertex)

    # Collect job status, result, and metrics
    print("Job status\n{}".format(r_job.get_status())) 
    print("Job results\n{}".format(r_job.get_results())) 
    print("Job metrics\n{}".format(r_job.get_metrics()))

Ejemplo n.º 18

0

Mostrar archivo

Archivo: Analyzing+Product+Sentiment.py Proyecto: ptiwaree/MLFoundations

# coding: utf-8

# # Predicting sentiment from product reviews
# 
# # Fire up GraphLab Create
# (See [Getting Started with SFrames](/notebooks/Week%201/Getting%20Started%20with%20SFrames.ipynb) for setup instructions)

# In[ ]:

import graphlab


# In[ ]:

# Limit number of worker processes. This preserves system memory, which prevents hosted notebooks from crashing.
graphlab.set_runtime_config('GRAPHLAB_DEFAULT_NUM_PYLAMBDA_WORKERS', 4)


# # Read some product review data
# 
# Loading reviews for a set of baby products. 

# In[ ]:

products = graphlab.SFrame('amazon_baby.gl/')


# # Let's explore this data together
# 
# Data includes the product name, the review text and the rating of the review.

Ejemplo n.º 19

0

Mostrar archivo

Archivo: lda.py Proyecto: jlorince/MusicForaging

import graphlab as gl
from graphlab.toolkits._main import ToolkitError
import numpy as np
import time
import datetime

gl.set_runtime_config('GRAPHLAB_DEFAULT_NUM_PYLAMBDA_WORKERS', 8)
gl.set_runtime_config('GRAPHLAB_DEFAULT_NUM_GRAPH_LAMBDA_WORKERS', 8)
#gl.set_runtime_config('GRAPHLAB_FILEIO_MAXIMUM_CACHE_CAPACITY',100000000000)
#gl.set_runtime_config('GRAPHLAB_FILEIO_MAXIMUM_CACHE_CAPACITY_PER_FILE',100000000000)

raw_docs = gl.SArray("LDA_vectors/")
vocab_idx = {}
for line in open('vocab_idx'):
    line = line.strip().split('\t')
    vocab_idx[int(line[1])] = line[0]

def formatter(row):
    row = eval(row)
    row = dict(zip([vocab_idx[term] for term in row[1][1]],row[1][2]))
    return row

docs = raw_docs.filter(lambda x: x!="").apply(formatter)
docs.save("doc_array",format='binary')

docs = gl.SArray("doc_array")

train,test = gl.SFrame(docs).random_split(0.9,seed=99)
train = gl.text_analytics.tf_idf(train['X1'])
test = gl.text_analytics.tf_idf(test['X1'])
#data = {'tfidf':(train_tfidf,test_tfidf),'tf':(train,test)}

Ejemplo n.º 20

0

Mostrar archivo

Archivo: classify_gl_baseline.py Proyecto: mrazakhan/Specnet_bc

import numpy
from graphlab import feature_engineering as fe
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics

from util import Util
from evals import Eval
from spectral_training import SpectralTraining
DEBUG=1




if __name__=='__main__':
	gl.set_runtime_config('GRAPHLAB_CACHE_FILE_LOCATIONS','/home/mraza/tmp/')	
	gl.set_runtime_config('GRAPHLAB_FILEIO_MAXIMUM_CACHE_CAPACITY', 200*1024*1024*1024)
	gl.set_runtime_config('GRAPHLAB_FILEIO_MAXIMUM_CACHE_CAPACITY_PER_FILE', 100*1024*1024*1024)
	gl.set_runtime_config('GRAPHLAB_DEFAULT_NUM_PYLAMBDA_WORKERS', 20)

	parser=argparse.ArgumentParser(description='Spectral Features Preprocessing')
	parser.add_argument('-cf','--clusteral_features',help='Input File containing clusteral features', required=True)
	parser.add_argument('-lf','--labels_file', help='Ground Truth labels file', required=True)
	parser.add_argument('-of','--output_file', help='Output file', required=True)
	parser.add_argument('-cfk','--clusteral_key_column', required=True)
	parser.add_argument('-lfk','--labels_key_column', required=True)
	parser.add_argument('-lfv','--labels_value_column', required=True)
	parser.add_argument('-i','--interaction', required=True)
	parser.add_argument('-j','--join_type', required=False)
	parser.add_argument('-e','--encode', required=False)
	parser.add_argument('-ex','--exclude', required=False)

Ejemplo n.º 21

0

Mostrar archivo

Archivo: Getting+Started+with+SFrames.py Proyecto: Hlo415/Java_Python

# coding: utf-8

# # Fire up GraphLab Create
#
# We always start with this line before using any part of GraphLab Create. It can take up to 30 seconds to load the GraphLab library - be patient!
#
# The first time you use GraphLab create, you must enter a product key to license the software for non-commerical academic use. To register for a free one-year academic license and obtain your key, go to [dato.com](https://dato.com/download/academic.html).

# In[2]:

import graphlab
# Set product key on this computer. After running this cell, you will not need to re-enter your product key.
graphlab.product_key.set_product_key('C7E4-BB1D-0150-A1E6-645C-66D9-D454-CC8D')

# Limit number of worker processes. This preserves system memory, which prevents hosted notebooks from crashing.
graphlab.set_runtime_config('GRAPHLAB_DEFAULT_NUM_PYLAMBDA_WORKERS', 4)

# Output active product key.
graphlab.product_key.get_product_key()

# # Load a tabular data set

# In[3]:

sf = graphlab.SFrame('people-example.csv')

# # SFrame basics

# In[6]:

sf.head()  # we can view first few lines of table

Ejemplo n.º 22

0

Mostrar archivo

import csv
import sys
import math
import numpy
from graphlab import feature_engineering as fe
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics

from util import Util
from evals import Eval
from spectral_training import SpectralTraining
DEBUG = 1

if __name__ == '__main__':
    gl.set_runtime_config('GRAPHLAB_CACHE_FILE_LOCATIONS', '/home/mraza/tmp/')
    gl.set_runtime_config('GRAPHLAB_FILEIO_MAXIMUM_CACHE_CAPACITY',
                          200 * 1024 * 1024 * 1024)
    gl.set_runtime_config('GRAPHLAB_FILEIO_MAXIMUM_CACHE_CAPACITY_PER_FILE',
                          100 * 1024 * 1024 * 1024)
    gl.set_runtime_config('GRAPHLAB_DEFAULT_NUM_PYLAMBDA_WORKERS', 20)

    parser = argparse.ArgumentParser(
        description='Spectral Features Preprocessing')
    parser.add_argument('-cf',
                        '--clusteral_features',
                        help='Input File containing clusteral features',
                        required=True)
    parser.add_argument('-lf',
                        '--labels_file',
                        help='Ground Truth labels file',

Ejemplo n.º 23

0

Mostrar archivo

#!/usr/bin/env python

import graphlab as gl
from sklearn.metrics import precision_recall_curve
import numpy as np
#import matplotlib.pyplot as plt
import argparse
import glob

gl.set_runtime_config('GRAPHLAB_CACHE_FILE_LOCATIONS','/scratch')
gl.set_runtime_config('GRAPHLAB_FILEIO_MAXIMUM_CACHE_CAPACITY',40000000000)
gl.set_runtime_config('GRAPHLAB_FILEIO_MAXIMUM_CACHE_CAPACITY_PER_FILE', 40000000000)
gl.set_runtime_config('GRAPHLAB_SFRAME_SORT_BUFFER_SIZE',40000000000)

parser = argparse.ArgumentParser(description='A script to get P-R curve for a model')
parser.add_argument('-d','--directory', help='directory to test', required=True )
parser.add_argument('-t','--test', help = 'test data matrix', required=True)
#parser.add_argument('-r,','--report',help= 'data for PR curve', required = True )
#parser.add_argument('-f,','--figure',help= 'figure name for plotting', required = True )

args = parser.parse_args()


#test =  gl.SFrame.read_csv('/global/projectb/scratch/arrivers/geneleanrntest/20150818/test.twoclass.txt', delimiter='\t', header=False)
#train =  gl.SFrame.read_csv('/global/projectb/scratch/arrivers/geneleanrntest/20150818/train.twoclass.txt', delimiter='\t', header=False)
#test.save('test_twoclass_sframe')
#train.save('train_twoclass_sframe')


test =  gl.SFrame.read_csv(args.test, delimiter='\t', header=False)

Ejemplo n.º 24

0

Mostrar archivo

Archivo: mf-graphlab-main.py Proyecto: supasorn/LinkPrediction

    #rmses_cf = run_cf(min_lambduh, min_k, min_lambduh_w)
    #rmses_cf2 = run_cf2(min_lambduh, min_k, min_lambduh_w)


# In[64]:

def main(argv):
    # pylint: disable=W0612
    try:
        argv = FLAGS(argv)  # parse flags
    except gflags.FlagsError, e:
        print '%s\\nUsage: %s ARGS\\n%s' % (e, sys.argv[0], FLAGS)
        sys.exit(1)

    gl.set_runtime_config('GRAPHLAB_DEFAULT_NUM_GRAPH_LAMBDA_WORKERS', 16)
    for flag_name in sorted(FLAGS.RegisteredFlags()):
        if flag_name not in ["?", "help", "helpshort", "helpxml"]:
            fl = FLAGS.FlagDict()[flag_name]
            with open('output/main.out', 'a') as f:
                f.write(
                    "# " + fl.help + " (" + flag_name + "): " + str(fl.value) + '\n')

    X_train, X_test = load(FLAGS.dataset)
    g = get_graph(X_train, FLAGS.rank)

    rmse_train, rmse_test, L, R, wu, wm, bu, bm = \
        sgd_gl_edge(g, X_train, X_test,
                    FLAGS.lamb, FLAGS.rank, FLAGS.eta, Niter=FLAGS.maxit,
                    unified=FLAGS.unified, lambduh_w=FLAGS.lamb_w, output="main")