Example #1
0
 def load_uid_iid_data(self):
     pydev.info('Begin loading data..')
     # no need to load train.
     # only load 10000 train as test_of_train.
     self.test_of_train, self.valid, self.test = utils.readdata(
         'data', test_num=10000)
     pydev.info('Load over')
Example #2
0
    def __init__(self):
        pydev.App.__init__(self)
    
        self.debug=True

        #TestNum = -1
        TestNum = -1

        pydev.info('Begin loading data..')
        self.train, self.valid, self.test = utils.readdata('data', test_num=TestNum)
        pydev.info('Load over')
Example #3
0
def classifierfig(filt='g'):
    global N
    data = readdata(PLASTICC)
    newdata = prepdata(data, TMIN, TMAX)
    

    plots = [] #will host all data
    ids = [] #id to change data source
    nspecial = np.sum(~np.array([k.startswith('sn_')
                                 for k in newdata.keys()])) 
    print ("special lightcurves", nspecial)
    #number of normal IIs
    nII = np.sum(np.array([k.startswith('sn_II_') for k in newdata.keys()]))

    for j,tmp0 in enumerate(r1):
     for k,tmp1 in enumerate(r2):
        i = k + j * len(r2)
        dts = (tmp0, tmp1)
        
        thisdir = "noplasticc/GPclassifier_%si"%filt + \
                  "/GPclassifier_%.1f_%.1f"%(dts[1], dts[0])
        tmp2 = getcdt(newdata, delta_t=dts[1],
                      color_t=dts[0], filt=filt)
        
        color = np.hstack([t[0] for t in tmp2[:nspecial]])
        shape = np.hstack([t[1] for t in tmp2[:nspecial]])
        #cleannan
        mask = np.isnan(color) * np.isnan(shape)
        color = color[~mask]
        shape = shape[~mask]

        #nsnIa = np.array([len(tmp2[i-2]) for i in range(4)]).sum()
        iicolor = np.hstack([t[0] for t in tmp2[nspecial:nspecial+nII]])
        iishape = np.hstack([t[1] for t in tmp2[nspecial:nspecial+nII]])
        #cleannan
        mask = np.isnan(iicolor) * np.isnan(iishape)
        iicolor = iicolor[~mask]
        iishape = iishape[~mask]        
        nii = iicolor.shape[0]

        iacolor = np.hstack([t[0] for t in tmp2[nspecial+nII:]])
        iashape = np.hstack([t[1] for t in tmp2[nspecial+nII:]])
        #cleannan
        mask = (np.isnan(iacolor) |  np.isnan(iashape))
        iacolor = iacolor[~mask]
        iashape = iashape[~mask]        
        nia = iacolor.shape[0]        
        np.random.seed(SEED)
        N = color.shape[0]
        
        # pick boring transients: 1/3 II and 1/2 Ia
        randIa = np.random.randint(0, len(iacolor), int(2 * N / 3))
        randII = np.random.randint(0, len(iicolor), int(N / 3) + 1)

        plotcolor = ['IndianRed'] * N + ['c'] * N
        label = np.hstack([np.zeros(N), np.ones(N)])
        #print(N, len(label))
        
        color = list(np.hstack([color, iicolor[randII], iacolor[randIa]]))
        shape = list(np.hstack([shape, iishape[randII], iashape[randIa]]))
        # pick a TRAINING fraction
        plots.append({'x':shape, 'y':color,
                      'idt':"%.1f-%.1f"%(dts[0], dts[1])})
        
        #plots['shape'] = plots['shape'] + shape
        #plots['c'] = plots['c'] + ([dts[0]] * len(color))
        #plots['s'] = plots['s'] + ([dts[1]] * len(color))  
        ids.append("%.1f-%.1f"%(dts[0], dts[1]))
        #alldata.append()
        phasespace_complete = np.array([shape, color]).T
    return plots, ids, N
Example #4
0
                y.append(random.choice(range(self.movie_count)))
                clicks.append(0)

        return torch.tensor(x), torch.tensor(y), torch.tensor(clicks)


if __name__ == '__main__':
    if len(sys.argv) != 3:
        print >> sys.stderr, 'Usage:\ndnn.py <datadir> <model>'
        sys.exit(-1)

    data_dir = sys.argv[1]
    model_save_path = sys.argv[2]

    EmbeddingSize = 128
    train, valid, test = utils.readdata(data_dir, test_num=1000)

    data = DataLoader(train)
    data.set_batch_size(100)

    model = FCDNN(data.movie_count, EmbeddingSize)
    optimizer = optim.SGD(model.parameters(), lr=0.1)
    loss_fn = nn.CrossEntropyLoss()

    def fwbp():
        x, y, clicks = data.next_iter()
        batch_count = len(x)
        #print x, y, clicks
        clicks_ = model.forward(x, y)
        loss = loss_fn(clicks_, clicks)
        loss.backward()
Example #5
0
    data_dir = autoarg.option('data', 'data/')
    model_save_path = autoarg.option('output', 'temp/dnn.pkl')

    TestNum = int(autoarg.option('testnum', -1))
    EmbeddingSize = int(autoarg.option('embed', 16))
    EpochCount = int(autoarg.option('epoch', 3))
    BatchSize = int(autoarg.option('batch', 1024))
    device_name = autoarg.option('device', 'cuda')

    pydev.info('EmbeddingSize=%d' % EmbeddingSize)
    pydev.info('Epoch=%d' % EpochCount)
    pydev.info('BatchSize=%d' % BatchSize)

    device = torch.device(device_name)

    train, valid, test = utils.readdata(data_dir, test_num=TestNum)
    data = DataGenerator(train, device, epoch_count=EpochCount, batch_size=BatchSize)

    model = DNNRank(data.user_count, data.movie_count, EmbeddingSize).to(device)
    #optimizer = optim.SGD(model.parameters(), lr=0.005)
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    loss_fn = nn.BCELoss()
    
    generator = data.data_generator()

    def test_validation():
        y = []
        y_ = []

        batch_size = 2048
        for begin in range(0, len(valid)-1, batch_size):
Example #6
0
    def add(self, a, b):
        if a not in self.cooc_dict:
            self.cooc_dict[a] = {}
        self.cooc_dict[a][b] = self.cooc_dict[a].get(b, 0) + 1
        self.total_edge += 1

if __name__=='__main__':
    MinCooc = 0
    TestNum = -1
    if len(sys.argv)>1:
        TestNum = int(sys.argv[1])
    WindowSize = 5

    print >> sys.stderr, 'begin loading data..(testnum=%d)' % TestNum
    train, _, _ = utils.readdata('data', test_num=TestNum)
    print >> sys.stderr, 'load over'

    cooc_dict = CoocDict()
    for uid, items in train:
        items = filter(lambda x:x[1]==1, items)

        for idx in range(len(items)-WindowSize):
            a, _,_ = items[idx]
            for offset in range(WindowSize):
                b, _,_ = items[idx + 1 + offset]
                cooc_dict.add(a, b)
                cooc_dict.add(b, a)

    print >> sys.stderr, 'Total cooc: %d' % (cooc_dict.total_edge)
Example #7
0
                continue
            ret.append(str(item))
            if len(ret) >= topN:
                return ret
        return ret

    utils.measure(predict, test, debug=False)


if __name__ == '__main__':
    TopN = 10
    TestNum = -1
    #TestNum = 100

    print >> sys.stderr, 'begin loading data..'
    train, valid, test = utils.readdata('data', test_num=TestNum)
    print >> sys.stderr, 'load over'

    print >> sys.stderr, 'Algor: Embeddings'
    algor_embeddings(train, valid, test, TopN)
    '''
    print >> sys.stderr, 'Algor: Hot'
    algor_hot(train, valid, test, TopN)

    print >> sys.stderr, 'Algor: Cooc'
    algor_cooc(train, valid, test, TopN)

    print >> sys.stderr, 'Algor: CoocOnly_1'
    algor_cooc(train, valid, test, TopN, only1=True)
    '''
    '''
Example #8
0
            del clicks


if __name__ == '__main__':
    if len(sys.argv) != 3:
        print >> sys.stderr, 'Usage:\ndnn.py <datadir> <model>'
        sys.exit(-1)

    device = torch.device('cuda')

    data_dir = sys.argv[1]
    model_save_path = sys.argv[2]

    EmbeddingSize = 32

    train, valid, test = utils.readdata(data_dir)

    data = DataLoader(train, device)
    del train

    model = FC_DNN(data.movie_count, EmbeddingSize).to(device)
    #optimizer = optim.SGD(model.parameters(), lr=0.005)
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    loss_fn = nn.BCELoss()

    generator = data.data_generator()

    test_y = []
    test_y_ = []

    class Trainer:
Example #9
0
tf.app.flags.DEFINE_string("loss_type", 'log_loss',
                           "loss type {square_loss, log_loss}")
tf.app.flags.DEFINE_integer("log_steps", 1000, "save summary every steps")
tf.app.flags.DEFINE_boolean("batch_norm", False,
                            "perform batch normaization (True or False)")
tf.app.flags.DEFINE_float(
    "batch_norm_decay", 0.9,
    "decay for the moving average(recommend trying decay=0.9)")

if FLAGS.dt_dir == "":
    FLAGS.dt_dir = (date.today() + timedelta(1 - 1)).strftime('%Y%m%d')
FLAGS.model_dir = FLAGS.model_dir + FLAGS.dt_dir

if FLAGS.train:
    data = readdata(FLAGS.data_dir, FLAGS.feature_size)
    random.shuffle(data)
    train_data = data[:(int)(len(data) * FLAGS.split_ratio)]
    test_data = data[(int)(len(data) * FLAGS.split_ratio):]
    print('read finish')
    print('train data size:', (len(train_data), len(train_data[0][0])))
    print('test data size:', (len(test_data), len(test_data[0][0])))
    train_size = len(train_data)
    test_size = len(test_data)
    min_round = 1
    num_round = 20
    early_stop_round = 5
    batch_size = FLAGS.batch_size

deep_ffm_params = {
    'train': FLAGS.train,
Example #10
0
from scipy import stats
import math
import sys
from sklearn.linear_model import BayesianRidge, Ridge, Lasso, RidgeCV, LogisticRegression
from sklearn import cross_validation
#from sklearn.preprocessing import PolynomialFeatures
#from sklearn.kernel_ridge import KernelRidge
#from sklearn.feature_selection import RFECV
from sklearn.externals import joblib
from sklearn.cross_validation import KFold
from utils import readdata
from sklearn import preprocessing

print "Running classification...."

X, y = readdata(True, sys.argv[1])
Xtest, ytest = readdata(True, sys.argv[2])

X = np.array(X, dtype=float)

le = preprocessing.LabelEncoder()
le.fit(y + ytest)

y = np.array(le.transform(y), dtype=int)
ytest = le.transform(ytest)

total_labels = float(sys.argv[4])
numlabels = len(np.unique(y))  #total_labels #
numlabelsindata = len(le.classes_)
#print X,y
bestalpha = -1
Example #11
0
def test(ct=0):
    data = readdata()
    wig = WIG(data, prune_topk=ct, epochs=1, min_count=1)
    # wig.train()
    df = wig.generateindex(compare=True)
    testplot(df, ct)
Example #12
0
    for k in datain.keys():
        returnvalue.append(Calculate_ColorDelta(datain[k]['g'],
                                                datain[k]['i'],
                                                delta_t,
                                                color_t))
    return returnvalue






dt1 = [4]#[1.5, 0.5, 3.5, 4.5, 6.5]#[0.5,1,1.5,2.5,3.5,4.5,5.5,6.5] #gap between obs in same filter
dt2 = [0]#[0.5, 0, 1, 2] #gap between filters
from utils import readdata
data = readdata(False)

#print(data['GWr'])
newdata = {}

for i,did in enumerate(data):
    if did == 'annotations':
        continue
    #[:-2]):

    d = data[did]   
    indices = d['time-rel'] < 15*24
    indices = d['time-rel'] > -21*24    
    newdata[did] = d[indices]

    #pl.plot(d[d['time-rel'])