def load_uid_iid_data(self): pydev.info('Begin loading data..') # no need to load train. # only load 10000 train as test_of_train. self.test_of_train, self.valid, self.test = utils.readdata( 'data', test_num=10000) pydev.info('Load over')
def __init__(self): pydev.App.__init__(self) self.debug=True #TestNum = -1 TestNum = -1 pydev.info('Begin loading data..') self.train, self.valid, self.test = utils.readdata('data', test_num=TestNum) pydev.info('Load over')
def classifierfig(filt='g'): global N data = readdata(PLASTICC) newdata = prepdata(data, TMIN, TMAX) plots = [] #will host all data ids = [] #id to change data source nspecial = np.sum(~np.array([k.startswith('sn_') for k in newdata.keys()])) print ("special lightcurves", nspecial) #number of normal IIs nII = np.sum(np.array([k.startswith('sn_II_') for k in newdata.keys()])) for j,tmp0 in enumerate(r1): for k,tmp1 in enumerate(r2): i = k + j * len(r2) dts = (tmp0, tmp1) thisdir = "noplasticc/GPclassifier_%si"%filt + \ "/GPclassifier_%.1f_%.1f"%(dts[1], dts[0]) tmp2 = getcdt(newdata, delta_t=dts[1], color_t=dts[0], filt=filt) color = np.hstack([t[0] for t in tmp2[:nspecial]]) shape = np.hstack([t[1] for t in tmp2[:nspecial]]) #cleannan mask = np.isnan(color) * np.isnan(shape) color = color[~mask] shape = shape[~mask] #nsnIa = np.array([len(tmp2[i-2]) for i in range(4)]).sum() iicolor = np.hstack([t[0] for t in tmp2[nspecial:nspecial+nII]]) iishape = np.hstack([t[1] for t in tmp2[nspecial:nspecial+nII]]) #cleannan mask = np.isnan(iicolor) * np.isnan(iishape) iicolor = iicolor[~mask] iishape = iishape[~mask] nii = iicolor.shape[0] iacolor = np.hstack([t[0] for t in tmp2[nspecial+nII:]]) iashape = np.hstack([t[1] for t in tmp2[nspecial+nII:]]) #cleannan mask = (np.isnan(iacolor) | np.isnan(iashape)) iacolor = iacolor[~mask] iashape = iashape[~mask] nia = iacolor.shape[0] np.random.seed(SEED) N = color.shape[0] # pick boring transients: 1/3 II and 1/2 Ia randIa = np.random.randint(0, len(iacolor), int(2 * N / 3)) randII = np.random.randint(0, len(iicolor), int(N / 3) + 1) plotcolor = ['IndianRed'] * N + ['c'] * N label = np.hstack([np.zeros(N), np.ones(N)]) #print(N, len(label)) color = list(np.hstack([color, iicolor[randII], iacolor[randIa]])) shape = list(np.hstack([shape, iishape[randII], iashape[randIa]])) # pick a TRAINING fraction plots.append({'x':shape, 'y':color, 'idt':"%.1f-%.1f"%(dts[0], dts[1])}) #plots['shape'] = plots['shape'] + shape #plots['c'] = plots['c'] + ([dts[0]] * len(color)) #plots['s'] = plots['s'] + ([dts[1]] * len(color)) ids.append("%.1f-%.1f"%(dts[0], dts[1])) #alldata.append() phasespace_complete = np.array([shape, color]).T return plots, ids, N
y.append(random.choice(range(self.movie_count))) clicks.append(0) return torch.tensor(x), torch.tensor(y), torch.tensor(clicks) if __name__ == '__main__': if len(sys.argv) != 3: print >> sys.stderr, 'Usage:\ndnn.py <datadir> <model>' sys.exit(-1) data_dir = sys.argv[1] model_save_path = sys.argv[2] EmbeddingSize = 128 train, valid, test = utils.readdata(data_dir, test_num=1000) data = DataLoader(train) data.set_batch_size(100) model = FCDNN(data.movie_count, EmbeddingSize) optimizer = optim.SGD(model.parameters(), lr=0.1) loss_fn = nn.CrossEntropyLoss() def fwbp(): x, y, clicks = data.next_iter() batch_count = len(x) #print x, y, clicks clicks_ = model.forward(x, y) loss = loss_fn(clicks_, clicks) loss.backward()
data_dir = autoarg.option('data', 'data/') model_save_path = autoarg.option('output', 'temp/dnn.pkl') TestNum = int(autoarg.option('testnum', -1)) EmbeddingSize = int(autoarg.option('embed', 16)) EpochCount = int(autoarg.option('epoch', 3)) BatchSize = int(autoarg.option('batch', 1024)) device_name = autoarg.option('device', 'cuda') pydev.info('EmbeddingSize=%d' % EmbeddingSize) pydev.info('Epoch=%d' % EpochCount) pydev.info('BatchSize=%d' % BatchSize) device = torch.device(device_name) train, valid, test = utils.readdata(data_dir, test_num=TestNum) data = DataGenerator(train, device, epoch_count=EpochCount, batch_size=BatchSize) model = DNNRank(data.user_count, data.movie_count, EmbeddingSize).to(device) #optimizer = optim.SGD(model.parameters(), lr=0.005) optimizer = optim.Adam(model.parameters(), lr=0.01) loss_fn = nn.BCELoss() generator = data.data_generator() def test_validation(): y = [] y_ = [] batch_size = 2048 for begin in range(0, len(valid)-1, batch_size):
def add(self, a, b): if a not in self.cooc_dict: self.cooc_dict[a] = {} self.cooc_dict[a][b] = self.cooc_dict[a].get(b, 0) + 1 self.total_edge += 1 if __name__=='__main__': MinCooc = 0 TestNum = -1 if len(sys.argv)>1: TestNum = int(sys.argv[1]) WindowSize = 5 print >> sys.stderr, 'begin loading data..(testnum=%d)' % TestNum train, _, _ = utils.readdata('data', test_num=TestNum) print >> sys.stderr, 'load over' cooc_dict = CoocDict() for uid, items in train: items = filter(lambda x:x[1]==1, items) for idx in range(len(items)-WindowSize): a, _,_ = items[idx] for offset in range(WindowSize): b, _,_ = items[idx + 1 + offset] cooc_dict.add(a, b) cooc_dict.add(b, a) print >> sys.stderr, 'Total cooc: %d' % (cooc_dict.total_edge)
continue ret.append(str(item)) if len(ret) >= topN: return ret return ret utils.measure(predict, test, debug=False) if __name__ == '__main__': TopN = 10 TestNum = -1 #TestNum = 100 print >> sys.stderr, 'begin loading data..' train, valid, test = utils.readdata('data', test_num=TestNum) print >> sys.stderr, 'load over' print >> sys.stderr, 'Algor: Embeddings' algor_embeddings(train, valid, test, TopN) ''' print >> sys.stderr, 'Algor: Hot' algor_hot(train, valid, test, TopN) print >> sys.stderr, 'Algor: Cooc' algor_cooc(train, valid, test, TopN) print >> sys.stderr, 'Algor: CoocOnly_1' algor_cooc(train, valid, test, TopN, only1=True) ''' '''
del clicks if __name__ == '__main__': if len(sys.argv) != 3: print >> sys.stderr, 'Usage:\ndnn.py <datadir> <model>' sys.exit(-1) device = torch.device('cuda') data_dir = sys.argv[1] model_save_path = sys.argv[2] EmbeddingSize = 32 train, valid, test = utils.readdata(data_dir) data = DataLoader(train, device) del train model = FC_DNN(data.movie_count, EmbeddingSize).to(device) #optimizer = optim.SGD(model.parameters(), lr=0.005) optimizer = optim.Adam(model.parameters(), lr=0.01) loss_fn = nn.BCELoss() generator = data.data_generator() test_y = [] test_y_ = [] class Trainer:
tf.app.flags.DEFINE_string("loss_type", 'log_loss', "loss type {square_loss, log_loss}") tf.app.flags.DEFINE_integer("log_steps", 1000, "save summary every steps") tf.app.flags.DEFINE_boolean("batch_norm", False, "perform batch normaization (True or False)") tf.app.flags.DEFINE_float( "batch_norm_decay", 0.9, "decay for the moving average(recommend trying decay=0.9)") if FLAGS.dt_dir == "": FLAGS.dt_dir = (date.today() + timedelta(1 - 1)).strftime('%Y%m%d') FLAGS.model_dir = FLAGS.model_dir + FLAGS.dt_dir if FLAGS.train: data = readdata(FLAGS.data_dir, FLAGS.feature_size) random.shuffle(data) train_data = data[:(int)(len(data) * FLAGS.split_ratio)] test_data = data[(int)(len(data) * FLAGS.split_ratio):] print('read finish') print('train data size:', (len(train_data), len(train_data[0][0]))) print('test data size:', (len(test_data), len(test_data[0][0]))) train_size = len(train_data) test_size = len(test_data) min_round = 1 num_round = 20 early_stop_round = 5 batch_size = FLAGS.batch_size deep_ffm_params = { 'train': FLAGS.train,
from scipy import stats import math import sys from sklearn.linear_model import BayesianRidge, Ridge, Lasso, RidgeCV, LogisticRegression from sklearn import cross_validation #from sklearn.preprocessing import PolynomialFeatures #from sklearn.kernel_ridge import KernelRidge #from sklearn.feature_selection import RFECV from sklearn.externals import joblib from sklearn.cross_validation import KFold from utils import readdata from sklearn import preprocessing print "Running classification...." X, y = readdata(True, sys.argv[1]) Xtest, ytest = readdata(True, sys.argv[2]) X = np.array(X, dtype=float) le = preprocessing.LabelEncoder() le.fit(y + ytest) y = np.array(le.transform(y), dtype=int) ytest = le.transform(ytest) total_labels = float(sys.argv[4]) numlabels = len(np.unique(y)) #total_labels # numlabelsindata = len(le.classes_) #print X,y bestalpha = -1
def test(ct=0): data = readdata() wig = WIG(data, prune_topk=ct, epochs=1, min_count=1) # wig.train() df = wig.generateindex(compare=True) testplot(df, ct)
for k in datain.keys(): returnvalue.append(Calculate_ColorDelta(datain[k]['g'], datain[k]['i'], delta_t, color_t)) return returnvalue dt1 = [4]#[1.5, 0.5, 3.5, 4.5, 6.5]#[0.5,1,1.5,2.5,3.5,4.5,5.5,6.5] #gap between obs in same filter dt2 = [0]#[0.5, 0, 1, 2] #gap between filters from utils import readdata data = readdata(False) #print(data['GWr']) newdata = {} for i,did in enumerate(data): if did == 'annotations': continue #[:-2]): d = data[did] indices = d['time-rel'] < 15*24 indices = d['time-rel'] > -21*24 newdata[did] = d[indices] #pl.plot(d[d['time-rel'])