def test(log_file): of = open(log_file, "w") # model = AdaBoostClassifier(base_estimator=SVC(10, probability=True), n_estimators=20, learning_rate=1) # model = AdaBoostClassifier(n_estimators=20, learning_rate=1) of.write("\t\t\tAUC\t\tPrecise\t\tRecall\t\tF1\n") # model = AdaBoostClassifier(n_estimators=n_estimators, learning_rate=learning_rate) model = AdaBoostClassifier(base_estimator=SVC(10, probability=True),n_estimators=n_estimators, learning_rate=learning_rate) X, app_Y = get_data("../data/orange_aft_clean.csv", attr="appetency", is_balance=True) auc, pre, rec, f1 = train_and_validation(X, app_Y, model) of.write("App\t\t%g\t\t%g\t\t%g\t\t%g\n" %(auc, pre, rec, f1)) # model = AdaBoostClassifier(n_estimators=n_estimators, learning_rate=learning_rate) model = AdaBoostClassifier(base_estimator=SVC(10, probability=True),n_estimators=n_estimators, learning_rate=learning_rate) X, churn_Y = get_data("../data/orange_aft_clean.csv", attr="churn", is_balance=True) auc, pre, rec, f1 = train_and_validation(X, churn_Y, model) of.write("Churn\t\t%g\t\t%g\t\t%g\t\t%g\n" %(auc, pre, rec, f1)) # model = AdaBoostClassifier(n_estimators=n_estimators, learning_rate=learning_rate) model = AdaBoostClassifier(base_estimator=SVC(10, probability=True),n_estimators=n_estimators, learning_rate=learning_rate) X, up_Y = get_data("../data/orange_aft_clean.csv", attr="upselling", is_balance=True) auc, pre, rec, f1 = train_and_validation(X, up_Y, model) of.write("Up\t\t%g\t\t%g\t\t%g\t\t%g\n" %(auc, pre, rec, f1))
def generate_quiz(request, friend_id): time_start = time() friend_data = get_data(request, friend_id, FRIEND_FIELDS) time_friend_data = time() self_data = get_data(request, 'me', SELF_FIELDS) time_self_data = time() logger.debug("TIME: friend_data fetch: %sms", round(1000 * (time_friend_data - time_start))) logger.debug("TIME: self_data fetch: %sms", round(1000 * (time_self_data - time_friend_data))) questions = get_questions(self_data, friend_data) answers = [question.correct_index for question in questions] request.session['answers'] = answers request.session['questions'] = [jsonpickle.encode(question) for question in questions] context = RequestContext(request, {'request': request, 'questions': questions, 'answers': answers, 'profile_pic': request.session['profile_pic'], 'profile_url': request.session['profile_url'], }) logger.debug("TIME: all preprocessing: %sms", round(1000 * (time() - time_start))) return render_to_response('quiz.html', context_instance=context)
def test_memoize(self): utils.get_data() self.assertTrue('expire' in utils.CACHE['get_data']) self.assertTrue('data' in utils.CACHE['get_data']) utils.CACHE.clear() utils.get_data() self.assertTrue('expire' in utils.CACHE['get_data']) self.assertTrue('data' in utils.CACHE['get_data'])
def compare(request,p1): compare_form = CompareForm(request.GET if request.GET.get('submit',None) else None) context = {'compare_form':compare_form,'person1':p1} if compare_form.is_valid(): p2 = compare_form.cleaned_data['compare_to'] p1oked, p1vetoed = utils.get_data(p1) p2oked, p2vetoed = utils.get_data(p2) context['person2'] = p2 context['p1oked'] = utils.sort_nameset(p1oked-p2oked) context['p2oked'] = utils.sort_nameset(p2oked-p1oked) context['intersection'] = utils.sort_nameset(p2oked.intersection(p1oked)) return HttpResponse(loader.get_template("names/compare.html").render(RequestContext(request,context)))
def index(request, form=None): if not form: form, user = auth_user(request) else: user = None if not user: content = utils.get_data('no_auth_index_content.%s.html'%request.LANGUAGE_CODE) elif user.role == 'admin': content = utils.get_data('admin_index_content.%s.html'%request.LANGUAGE_CODE) else: content = utils.get_data('user_index_content.%s.html'%request.LANGUAGE_CODE) return render_to_response('index.html',{'form':form, 'user':user,'content':content})
def fine_tuning_step(n_estimators, max_features, max_depth): model = RandomForestClassifier(n_estimators=n_estimators, max_features=max_features, max_depth=max_depth) X, app_Y = get_data("../data/orange_aft_clean.csv", attr="appetency", is_balance=True) app_auc, pre, rec, f1 = train_and_validation(X, app_Y, model) model = RandomForestClassifier(n_estimators=n_estimators, max_features=max_features, max_depth=max_depth) X, churn_Y = get_data("../data/orange_aft_clean.csv", attr="churn", is_balance=True) churn_auc, pre, rec, f1 = train_and_validation(X, churn_Y, model) model = RandomForestClassifier(n_estimators=n_estimators, max_features=max_features, max_depth=max_depth) X, up_Y = get_data("../data/orange_aft_clean.csv", attr="upselling", is_balance=True) up_auc, pre, rec, f1 = train_and_validation(X, up_Y, model) return app_auc, churn_auc, up_auc
def test_cache(self): """ Test cache decorator for get_data function. """ CACHE = utils.CACHE self.assertEqual(CACHE, {}) data = utils.get_data() self.assertNotEqual(CACHE, {}) self.assertEqual(CACHE['get_data']['data'], data) cache_time = CACHE['get_data']['time'] CACHE['get_data']['time'] = time() + 86400 utils.get_data() self.assertNotEqual(cache_time, CACHE['get_data']['time']) CACHE = {}
def fine_tuning_step(n_neighbors): model = NearestNeighbors(n_neighbors=n_neighbors) X, app_Y = get_data("../data/orange_aft_clean.csv", attr="appetency", is_balance=True) app_auc, pre, rec, f1 = train_and_validation(X, app_Y, model) model = NearestNeighbors(n_neighbors=n_neighbors) X, churn_Y = get_data("../data/orange_aft_clean.csv", attr="churn", is_balance=True) churn_auc, pre, rec, f1 = train_and_validation(X, churn_Y, model) model = NearestNeighbors(n_neighbors=n_neighbors) X, up_Y = get_data("../data/orange_aft_clean.csv", attr="upselling", is_balance=True) up_auc, pre, rec, f1 = train_and_validation(X, up_Y, model) return app_auc, churn_auc, up_auc
def get_suggestions(**args): # initializations papers = [] bibcodes = [] if 'bibcodes' in args: bibcodes = args['bibcodes'] if len(bibcodes) == 0: return [] # Any overrides for default values? Nsuggestions = current_app.config.get('CITATION_HELPER_NUMBER_SUGGESTIONS') # get rid of potential trailing spaces bibcodes = map(lambda a: a.strip(), bibcodes)[:current_app.config.get('CITATION_HELPER_MAX_INPUT')] # start processing # get the citations for all publications (keeping multiplicity is essential) papers = get_data(bibcodes=bibcodes) if "Error" in papers: return papers # removes papers from the original list to get candidates papers = filter(lambda a: a not in bibcodes, papers) # establish frequencies of papers in results paperFreq = [(k,len(list(g))) for k, g in groupby(sorted(papers))] # and sort them, most frequent first paperFreq = sorted(paperFreq, key=operator.itemgetter(1),reverse=True) # remove all papers with frequencies smaller than threshold paperFreq = filter(lambda a: a[1] > current_app.config.get('CITATION_HELPER_THRESHOLD_FREQUENCY'), paperFreq) # get metadata for suggestions meta_dict = get_meta_data(results=paperFreq[:Nsuggestions]) if "Error"in meta_dict: return meta_dict # return results in required format return [{'bibcode':x,'score':y, 'title':meta_dict[x]['title'], 'author':meta_dict[x]['author']} for (x,y) in paperFreq[:Nsuggestions] if x in meta_dict.keys()]
def ma_nearby_x(code,index=0, x=24): day_data = get_day_data_rv(code) # return # print day_data sum=0; if len(day_data)<(x*2+index): return False for x in range(index,x+index): sum+=float(day_data[x][3]) print day_data[x] print sum print x print sum/(x+1) return newest_data = day_data[index][1:5] point = float(newest_data[2]) point_x = sum/x if code[0] == '0' or code[0] == '3': _code = 'sz'+code else: _code = 'sh'+code res = utils.get_data(CONFIG.URL_CHECK_STOCK+_code) res = json.loads(res[23:len(res)-4]) if float(res[0]) == 0: return False if (point-point_x)/point_x < 0.05: up_x_data.append([point_x,point,(point-point_x)/point_x,code]) return True return False
def test_service_results(self): '''Test to see if mock methods return expected results''' httpretty.register_uri( httpretty.GET, self.app.config.get( 'CITATION_HELPER_SOLRQUERY_URL'), content_type='application/json', status=200, body="""{ "responseHeader":{ "status":0, "QTime":0, "params":{ "fl":"reference,citation", "indent":"true", "wt":"json", "q":"*"}}, "response":{"numFound":10456930,"start":0,"docs":%s }}""" % json.dumps(mockdata)) expected_papers = [ u'x', u'z', u'd', u'x', u'e', u'y', u'p', u'p', u'c', u'p', u'y', u'a'] bibcodes = ['a', 'b', 'c'] results = get_data(bibcodes=bibcodes) self.assertEqual(results, expected_papers) expected_meta = {u'a': {'author': u'a_author,+', 'title': u'a_title'}, u'c': {'author': u'c_author,+', 'title': u'c_title'}, u'b': {'author': u'b_author,+', 'title': u'b_title'}, u'p': {'author': u'p_author,+', 'title': u'p_title'}, u'y': {'author': u'y_author,+', 'title': u'y_title'}, u'x': {'author': u'x_author,+', 'title': u'x_title'}, u'z': {'author': u'z_author,+', 'title': u'z_title'}} scorelist = [('a', 3), ('b', 2)] resmeta = get_meta_data(results=scorelist) self.assertEqual(resmeta, expected_meta)
def test_group_by_weekday(self): """ Test grouping presence entries by weekday. """ data = utils.get_data() correct_data = [[], [30047], [24465], [23705], [], [], []] self.assertEqual(utils.group_by_weekday(data[10]), correct_data)
def run(): table = get_data(get_data_path('taxonomy.csv')) group_id = species_id = 1 for index in range(1, len(table)): row = table[index] kwargs = { 'id': group_id, 'order': row[5], 'family': row[6].split('(')[0].strip(), 'genus': row[3].split(' ')[0], } if 'en' in get_languages() and '(' in row[6]: kwargs['name_en'] = row[6].split('(')[1][:-1].strip() group, created = SpeciesGroup.objects.get_or_create(**kwargs) if created: group_id += 1 Species.objects.create( id=species_id, include=False, order=species_id, rank=Rank.objects.get(slug=row[1]), group=group, standard_name=row[4], scientific_name=row[3], ) if created: species_id += 1
def make_movie(datadir, deltaname, variable, framenum): datafile = utils.datafiles[variable] delta = utils.get_data(datadir, datafile, deltaname) mp = utils.gridEdges(datadir) cmap = utils.cmap[variable] vmin = np.nanmin(delta["data"]) vmax = np.nanmax(delta["data"]) norm = colors.Normalize(vmin=vmin, vmax=vmax) fig = plt.figure() ax = fig.add_subplot(1, 1, 1) bm = utils.basemap(ax) X, Y = bm(mp["lons"], mp["lats"]) print ax.get_xlim() print ax.get_ylim() ax.axis(utils.mapbounds[deltaname]) def updatefig(i): mp["map"][delta["inds"][0], delta["inds"][1]] = delta["data"].iloc[i, :] date = delta["data"].index[i].strftime("%Y-%m-%d") im = bm.pcolormesh(X, Y, np.ma.masked_invalid(mp["map"]), cmap=cmap, norm=norm) cbar = bm.colorbar(im, "bottom", cmap=cmap, norm=norm) ax.set_title("{}: {}".format(utils.fullname[variable], date)) framenum = 5 ani = animation.FuncAnimation(fig, updatefig, frames=framenum) ani.save("{}_{}_{}.mp4".format(utils.fullname[variable], deltaname, framenum))
def fine_tuning_step(n_estimators, learning_rate): sess = tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=5, inter_op_parallelism_threads=5)) model = NN_Voting(sess, n_estimators=n_estimators, learning_rate=learning_rate) X, app_Y = get_data("../data/orange_aft_clean.csv", attr="appetency", is_balance=True) app_auc, pre, rec, f1 = train_and_validation(sess, X, app_Y, model) model = NN_Voting(sess, n_estimators=n_estimators, learning_rate=learning_rate) X, churn_Y = get_data("../data/orange_aft_clean.csv", attr="churn", is_balance=True) churn_auc, pre, rec, f1 = train_and_validation(sess, X, churn_Y, model) model = NN_Voting(sess, n_estimators=n_estimators, learning_rate=learning_rate) X, up_Y = get_data("../data/orange_aft_clean.csv", attr="upselling", is_balance=True) up_auc, pre, rec, f1 = train_and_validation(sess, X, up_Y, model) return app_auc, churn_auc, up_auc
def fine_tuning_step(n_estimators, learning_rate): model = AdaBoostClassifier(base_estimator=SVC(10, probability=True),n_estimators=n_estimators, learning_rate=learning_rate) # model = AdaBoostClassifier(n_estimators=n_estimators, learning_rate=learning_rate) X, app_Y = get_data("../data/orange_aft_clean.csv", attr="appetency", is_balance=True) app_auc, pre, rec, f1 = train_and_validation(X, app_Y, model) # model = AdaBoostClassifier(n_estimators=n_estimators, learning_rate=learning_rate) model = AdaBoostClassifier(base_estimator=SVC(10, probability=True),n_estimators=n_estimators, learning_rate=learning_rate) X, churn_Y = get_data("../data/orange_aft_clean.csv", attr="churn", is_balance=True) churn_auc, pre, rec, f1 = train_and_validation(X, churn_Y, model) # model = AdaBoostClassifier(n_estimators=n_estimators, learning_rate=learning_rate) model = AdaBoostClassifier(base_estimator=SVC(10, probability=True),n_estimators=n_estimators, learning_rate=learning_rate) X, up_Y = get_data("../data/orange_aft_clean.csv", attr="upselling", is_balance=True) up_auc, pre, rec, f1 = train_and_validation(X, up_Y, model) return app_auc, churn_auc, up_auc
def train(fea_file, model_file): X_train,Y_train = utils.get_data(fea_file) print "load fea file ok" logreg = linear_model.LogisticRegression(C=1e5) logreg.fit(X_train,Y_train) print "fit ok" joblib.dump(logreg, model_file) print "dump ok"
def _anonymoususer(request, *args, **kwargs): user = request.user if user.is_authenticated(): return HttpResponseRedirect(redirect_to=url_reverse('users.views.view_homepage')) next = get_data(request).get('next', '') if next: kwargs['next'] = next return the_function(request, *args, **kwargs)
def predict(fea_file, model_file, predict_file): Y_test,X_test = utils.get_data(fea_file) log_reg = joblib.load(model_file) Y_predict = log_reg.predict(X_test) ou_predict = open(predict_file, "w") for y in Y_predict: ou_predict.write(str(y) + "\n") ou_predict.close()
def export_secondaryref_info(request,outfile): ids = utils.get_data(request,'mysql') res=[] data = utils.get_data('SELECT refdesc.id,year,language,keywords FROM refdesc INNER JOIN refs ON refs.id=refdesc.id WHERE keywords IS NOT NULL;','mysql') # put in dico data_dico = dict() for row in data : r = list(row) #print('r : '+r[0].encode('utf8')) data_dico[r[0].encode('utf8')]=[r[i] for i in range(1,len(r))] for i in ids : #print(i[0]) if i[0].encode('utf8') in data_dico : #print('i : '+i[0].encode('utf8')) #print(data_dico[i[0].encode('utf8')]) res.append(data_dico[i[0].encode('utf8')]) utils.export_matrix_csv(res,outfile,'\t',False)
def run(*args): for filename in args: table = get_data(filename) for index in range(1, len(table)): row = table[index] species = Species.objects.get(scientific_name=row[0]) species.include = True setattr(species, table[0][1], row[1]) species.save()
def users_view(): """ Users listing for dropdown. """ data = get_data() return [ {'user_id': i, 'name': 'User {0}'.format(str(i))} for i in data.keys() ]
def test(log_file): of = open(log_file, "w") of.write("\t\t\tAUC\t\tPrecise\t\tRecall\t\tF1\n") model = NearestNeighbors(n_neighbors=n_neighbors) X, app_Y = get_data("../data/orange_aft_clean.csv", attr="appetency", is_balance=True) auc, pre, rec, f1 = train_and_validation(X, app_Y, model) of.write("App\t\t%g\t\t%g\t\t%g\t\t%g\n" %(auc, pre, rec, f1)) model = NearestNeighbors(n_neighbors=n_neighbors) X, churn_Y = get_data("../data/orange_aft_clean.csv", attr="churn", is_balance=True) auc, pre, rec, f1 = train_and_validation(X, churn_Y, model) of.write("Churn\t\t%g\t\t%g\t\t%g\t\t%g\n" %(auc, pre, rec, f1)) model = NearestNeighbors(n_neighbors=n_neighbors) X, up_Y = get_data("../data/orange_aft_clean.csv", attr="upselling", is_balance=True) auc, pre, rec, f1 = train_and_validation(X, up_Y, model) of.write("Up\t\t%g\t\t%g\t\t%g\t\t%g\n" %(auc, pre, rec, f1))
def main(): """Driver function.""" from utils import get_data params = {'symbols': ['AAPL', 'FB', 'GOOG'], 'start_date': '01/01/2012', 'end_date': '03/20/2016', 'principle': 1000.00} prices = get_data(params) allocs = optimize_portfolio(prices) print(allocs)
def do_get_companies(hint): if not hint: return [] hint = str(hint) data = get_data() ret = [] for mem_id in data: mail = data[mem_id]["email"] domain = mail.split("@")[-1] if hint.lower() in domain.lower(): ret.append(data[mem_id]) return ret
def test_bootstrap() : corpus = utils.get_data('SELECT id FROM refdesc WHERE abstract_keywords IS NOT NULL LIMIT 2000;','../../Data/dumps/20160126_cybergeo.sqlite3') for kwLimit in [50,100,200] : for subCorpusSize in [100,500,1000,2000] : bootstrapSize=25 [relevantkw,relevant_dico,allkw] = bootstrap_subcorpuses(corpus,kwLimit,subCorpusSize,bootstrapSize) utils.export_dico_csv(relevant_dico,'res/conv_dico/bootstrap_relevantDico_kwLimit'+str(kwLimit)+'_subCorpusSize'+str(subCorpusSize)+'_bootstrapSize'+str(bootstrapSize),True) utils.export_list(relevantkw,'res/conv_kw/kw_'+str(kwLimit)+'_subCorpusSize'+str(subCorpusSize),False) utils.export_dico_num_csv(relevantkw,'res/conv_tm/kw_'+str(kwLimit)+'_subCorpusSize'+str(subCorpusSize),False) for i in range(len(allkw)) : local_kw = allkw[i] utils.export_list(local_kw.keys(),'res/conv_kw/kw_'+str(kwLimit)+'_subCorpusSize'+str(subCorpusSize)+'_run'+str(i),False) utils.export_dico_num_csv(local_kw,'res/conv_tm/kw_'+str(kwLimit)+'_subCorpusSize'+str(subCorpusSize)+'_run'+str(i),False)
def do_get_cores(hint): if not hint: return [] hint = str(hint) data = get_data() ret = [] for mem_id in data: username = data[mem_id]["username"] fullname = data[mem_id]["fullname"] target_str = " ".join([username, fullname]) if hint.lower() in target_str.lower(): ret.append(data[mem_id]) return ret
def extract_relevant_cybergeo(kwLimit, database): corpus = utils.get_data( "SELECT cybergeo.id FROM refdesc INNER JOIN cybergeo ON cybergeo.id=refdesc.id WHERE abstract_keywords IS NOT NULL AND abstract_keywords!='';", database, ) print(corpus) occurence_dicos = utils.import_kw_dico_req( "SELECT cybergeo.id,abstract_keywords FROM refdesc INNER JOIN cybergeo ON cybergeo.id=refdesc.id WHERE abstract_keywords IS NOT NULL AND abstract_keywords!='';", database, ) print(occurence_dicos) [relevantkw, relevant_dico] = kwFunctions.extract_relevant_keywords(corpus, kwLimit, occurence_dicos) utils.export_dico_csv(relevant_dico, "res/cybergeo/relevantDico_kwLimit" + str(kwLimit), False) utils.export_dico_num_csv(relevantkw, "res/cybergeo/kw_" + str(kwLimit), False)
def test_get_data(self): """ Test parsing of CSV file. """ data = utils.get_data() self.assertIsInstance(data, dict) self.assertItemsEqual(data.keys(), [10, 11, 12, 13, 5123]) sample_date = datetime.date(2013, 9, 10) self.assertIn(sample_date, data[10]) self.assertItemsEqual(data[10][sample_date].keys(), ['start', 'end']) self.assertEqual( data[10][sample_date]['start'], datetime.time(9, 39, 5) )
def extract_relevant_cybergeo_fulltext(kwLimit): resdir = "res/cybergeo_full/" # corpus = utils.get_data( "SELECT id FROM cybergeo WHERE fulltext_keywords IS NOT NULL AND fulltext_keywords!='' LIMIT 10;", "mysql" ) occurence_dicos = utils.import_kw_dico_req( "SELECT id,fulltext_keywords FROM cybergeo WHERE fulltext_keywords IS NOT NULL AND fulltext_keywords!='' LIMIT 10;", "mysql", ) [relevantkw, relevant_dico] = kwFunctions.extract_relevant_keywords(corpus, kwLimit, occurence_dicos) # export as csv utils.export_dico_csv(relevant_dico, resdir + "relevantDico_kw" + str(kwLimit), False) export_dico_num_csv(relevantkw, resdir + "termhoods_kw" + str(kwLimit), False)
def train_model(self): # initialize variables try: tf.global_variables_initializer().run() except: tf.initialize_all_variables().run() # load model if model is exist bool_load = self.load_model() if bool_load: print('[***] load model successfully') else: print('[!!!] fail to load model') # get mnist dataset datasource = get_data(data_type=self.config.dataset, is_training=self.config.is_training) data_gen = gen_batch_data(batchsize=self.batchsize, datasource=datasource) counter = 0 for epoch in range(self.config.epoch): # save model per 10 epoches print('epoch:{}'.format(epoch)) if np.mod(epoch, 10) == 0: self.save_model() for ite in tqdm(range(50000 / self.batchsize)): input_x, input_labels = next(data_gen) noise_z = np.random.uniform(-1, 1, size=[self.batchsize, self.z_dim]).astype(np.float) if ite == 0: sample_labels = input_labels # optimize discriminator _, d_loss, summaries = self.sess.run( [self.d_optim, self.d_loss, self.summaries], feed_dict={ self.z: noise_z, self.input_labels: input_labels, self.input_x: input_x }) # optimize generator _, g_loss = self.sess.run([self.g_optim, self.g_loss], feed_dict={ self.z: noise_z, self.input_labels: input_labels }) # optimize generator _, g_loss = self.sess.run([self.g_optim, self.g_loss], feed_dict={ self.z: noise_z, self.input_labels: input_labels }) # sample image during training phase if np.mod(ite, 100) == 0: sample = self.sample.eval({ self.z: noise_z, self.input_labels: sample_labels }) save_image([8, 8], '{}/sample_{:3d}_{:4d}.png'.format( self.config.sample_dir, epoch, ite), sample) # save_image([8,8], '{}/input_{:3d}_{:4d}.png'.format(self.config.sample_dir, epoch, ite), input_x) # visualize loss in browser using tensorboard counter = counter + 1 self.summary_writer.add_summary(summaries, global_step=counter)
'--mode', type=str, required=True, help='either "train" or "test"') parser.add_argument('-w', '--weights', type=str, help='a trained model weights') args = parser.parse_args() maybe_make_dir('weights') maybe_make_dir('portfolio_val') timestamp = time.strftime('%Y%m%d%H%M') data = np.around(get_data()) train_data = data[:, :3526] test_data = data[:, 3526:] env = TradingEnv(train_data, args.initial_invest) state_size = env.observation_space.shape action_size = env.action_space.n agent = DQNAgent(state_size, action_size) scaler = get_scaler(env) portfolio_value = [] if args.mode == 'test': # remake the env with test data env = TradingEnv(test_data, args.initial_invest) # load trained weights
def bernoulli(head, tail): total = head + tail return ((head / total)**head) * ((tail / total)**tail) def binomial(head, tail): return combinations(head + tail, head) * bernoulli(head, tail) parser = argparse.ArgumentParser() parser.add_argument('--filename', type=str, default='testfile.txt') parser.add_argument('--a', type=int, default=0) parser.add_argument('--b', type=int, default=0) args = parser.parse_args() print(args) trials = get_data(args.filename) a, b = args.a, args.b for case, trial in enumerate(trials, 1): print('case %s: %s' % (case, trial)) head = trial.count('1') tail = len(trial) - head print('Likelihood: %s' % binomial(head, tail)) print('Beta prior: a = %s b = %s' % (a, b)) a += head b += tail print('Beta posterior: a = %s b = %s\n' % (a, b))
# add the model on top of the convolutional base model = Model(inputs=model.input, outputs=top_model(model.output)) for layer in model.layers[:19]: layer.trainable = False # Two methods: # 1. Use sparse_categorial_crossentropy # 2. if using categorical_crossentropy: # should use keras.utils.np_utils.to_categorical in generator to convert labels. model.compile(loss='categorical_crossentropy', optimizer=optimizers.SGD(lr=1e-4, momentum=0.9), metrics=['accuracy']) train_lines, valid_lines = get_data('data.csv') ntrain, nvalid = len(train_lines), len(valid_lines) print(""" Training set: %d images. Validation set: %d images. """ % (ntrain, nvalid)) train_generator = generator_from_csv(train_lines, batch_size=batch_size, target_size=(img_height, img_width), train=True) validation_generator = generator_from_csv(valid_lines, batch_size=1, target_size=(img_height, img_width),
def get_home_page(): return render_template("home.html", data=get_data())
model.fit(X, y.reshape(-1)) # Get loss from std sys.stdout = old_stdout loss_history = mystdout.getvalue() loss_list = [] for line in loss_history.split('\n'): if (len(line.split("loss: ")) == 1): continue loss_list.append(float(line.split("loss: ")[-1])) return model, loss_list if __name__ == '__main__': config = {EPOCHS: 1000, LEARNING_RATE: 0.05} X, y = get_data() # Sklearn model model, sklearn_losses = sklearn_classifier(X, y, config) # Random selection coordinate descent print("Random Model") config[WEIGHT_SELECTION] = "random" rand_model = CoordinateDescentRegression(X.shape[1]) rand_losses, rand_accs = train(rand_model, X, y, config) # config[LEARNING_RATE] = 0.00001 # Best selection coordinate descent print("BEST Model") config[WEIGHT_SELECTION] = "best" best_model = CoordinateDescentRegression(X.shape[1])
def get_data(): res = utils.get_data() return utils.sort_data(res)
def get_wide_deep_columns_new(data_file): df = get_data(data_file) print('len(df.columns)', len(df.columns)) deep_columns = [] wide_columns = [] cross_dict = {} for feature_conf_info in feature_conf_list: feature_function = feature_conf_info['feature_column_function'] feature_name = feature_conf_info['name'] ###########################category############################ if feature_function == 'categorical_column_with_vocabulary_list': voc_list = feature_conf_info.get('voc_list') if voc_list is None: voc_list = df[feature_name].unique() feature = tf.feature_column.categorical_column_with_vocabulary_list( feature_name, voc_list) # feature = tf.feature_column.categorical_column_with_identity(feature_name, len(voc_list)) wide_columns.append(feature) deep_columns.append( tf.feature_column.embedding_column(feature, dimension=8)) elif feature_function == 'categorical_column_with_hash_bucket': # 字符串映射,不想维护映射关系,hash_bucket_size=类别数2*5倍,会有hash冲突的问题 hash_bucket_size = feature_conf_info['hash_bucket_size'] feature = tf.feature_column.categorical_column_with_hash_bucket( feature_name, hash_bucket_size=hash_bucket_size) embed_dim = embedding_dim(hash_bucket_size) wide_columns.append(feature) deep_columns.append( tf.feature_column.embedding_column(feature, dimension=embed_dim)) ###########################category################################# ###########################dense columns############################ elif feature_function == 'numeric_column': deep_columns.append(tf.feature_column.numeric_column(feature_name)) ###########################dense columns############################ ###########################cross columns############################ elif feature_name == 'same_city_look_order_start_distance': bucket_num = feature_conf_info['bucket_num'] dim_value = bucket_num score_span = (df[feature_name].max() - df[feature_name].min()) // bucket_num + 1 min_value = df[feature_name].min() same_city_look_order_start_distance_buckets = tf.feature_column.bucketized_column( tf.feature_column.numeric_column(feature_name), [(i * score_span + min_value) for i in range(bucket_num)]) wide_columns.append(same_city_look_order_start_distance_buckets) embed_dim = embedding_dim(dim_value) deep_columns.append( tf.feature_column.embedding_column( same_city_look_order_start_distance_buckets, dimension=embed_dim)) cross_dict.update({ 'same_city_look_order_start_distance': same_city_look_order_start_distance_buckets }) elif feature_name == 'same_city_look_order_price': bucket_num = feature_conf_info['bucket_num'] dim_value = bucket_num score_span = (df[feature_name].max() - df[feature_name].min()) // bucket_num + 1 min_value = df[feature_name].min() same_city_look_order_price_buckets = tf.feature_column.bucketized_column( tf.feature_column.numeric_column(feature_name), [(i * score_span + min_value) for i in range(bucket_num)]) wide_columns.append(same_city_look_order_price_buckets) embed_dim = embedding_dim(dim_value) deep_columns.append( tf.feature_column.embedding_column( same_city_look_order_price_buckets, dimension=embed_dim)) cross_dict.update({ 'same_city_look_order_price': same_city_look_order_price_buckets }) elif feature_name == 'same_city_look_order_time_diff': bucket_num = feature_conf_info['bucket_num'] dim_value = bucket_num score_span = (df[feature_name].max() - df[feature_name].min()) // bucket_num + 1 min_value = df[feature_name].min() same_city_look_order_time_diff_buckets = tf.feature_column.bucketized_column( tf.feature_column.numeric_column(feature_name), [(i * score_span + min_value) for i in range(bucket_num)]) wide_columns.append(same_city_look_order_time_diff_buckets) embed_dim = embedding_dim(dim_value) deep_columns.append( tf.feature_column.embedding_column( same_city_look_order_time_diff_buckets, dimension=embed_dim)) cross_dict.update({ 'same_city_look_order_time_diff': same_city_look_order_time_diff_buckets }) elif feature_function == 'crossed_column': # 交叉特征的元素不能重复定义,所以只能单独拎出来 start_distance_price_time_diff_feature = tf.feature_column.crossed_column( [ cross_dict['same_city_look_order_start_distance'], cross_dict['same_city_look_order_price'], cross_dict['same_city_look_order_time_diff'] ], hash_bucket_size=10000) wide_columns.append(start_distance_price_time_diff_feature) deep_columns.append( tf.feature_column.embedding_column( start_distance_price_time_diff_feature, dimension=embedding_dim(10000))) start_distance_price_feature = tf.feature_column.crossed_column( [ cross_dict['same_city_look_order_start_distance'], cross_dict['same_city_look_order_price'] ], hash_bucket_size=1000) wide_columns.append(start_distance_price_feature) deep_columns.append( tf.feature_column.embedding_column( start_distance_price_feature, dimension=embedding_dim(1000))) price_time_diff_feature = tf.feature_column.crossed_column( [ cross_dict['same_city_look_order_price'], cross_dict['same_city_look_order_time_diff'] ], hash_bucket_size=1000) wide_columns.append(price_time_diff_feature) deep_columns.append( tf.feature_column.embedding_column( price_time_diff_feature, dimension=embedding_dim(1000))) start_distance_time_diff_feature = tf.feature_column.crossed_column( [ cross_dict['same_city_look_order_start_distance'], cross_dict['same_city_look_order_time_diff'] ], hash_bucket_size=1000) wide_columns.append(start_distance_time_diff_feature) deep_columns.append( tf.feature_column.embedding_column( start_distance_time_diff_feature, dimension=embedding_dim(1000))) else: pass ###########################cross columns############################ return wide_columns, deep_columns
def get(self, url, param, retry=3): logger.info('Crawl content url: %s, %s', url, str(param)) if not url.startswith('http'): return utils.read_content(url) return utils.get_data(url, param, retry)
def get_label(cnt, pct, labelList, n): ''' 预测年龄、性别、星座、学历 :param score: :return: 兴趣分值最高的n个兴趣 ''' score = cal_label(cnt.astype('float64'), pct.astype('float64')) score = score.tolist() n_top = sorted(range(len(score)), key=lambda i: score[i], reverse=True)[:n] labels = set(labelList[index] for index in n_top) return labels if __name__ == '__main__': name, age, gender, interests, edu, constellation = get_data( conf.aiqiyi_dir) genderList = ['男', '女'] ageList = ['1-17', '18-24', '25-30', '31-35', '36-40', '40+'] eduList = ['小学', '初中', '高中-中专', '大专', '本科', '硕士以上'] constellationList = [ '白羊座', '金牛座', '双子座', '巨蟹座', '狮子座', '处女座', '天秤座', '天蝎座', '射手座', '摩羯座', '水瓶座', '双鱼座' ] labelList = [(gender, genderList), (age, ageList), (edu, eduList), (constellation, constellationList)] name_toidx, idx_toname = name_index(name) namelist = ['热血狂篮', '亲爱的活祖宗', '芈月传奇番外篇之邪恶游戏', '热血狂篮', 'others'] namelists = [namelist, namelist] for each in labelList:
for i in range(len(d1)): data.append((d1[i], d2[i])) data = get_data(data) lengths = [] for i in data: lengths.append(len(i[0].phrase.split())) lengths.append(len(i[1].phrase.split())) if params.max > 0: random.shuffle(data) data = data[0:params.max] if params.mode == "ppdb": d = utils.get_data("../data/ppdb-XL-ordered-data.txt") random.shuffle(d) ct = 0 for i in data: ct += len(i[0].phrase.split()) + len(i[1].phrase.split()) data = [] print ct idx = 0 ct2 = 0 while ct > 0: dd = d[idx] data.append(dd) v = len(dd[0].phrase.split()) + len(dd[1].phrase.split()) ct -= v ct2 += v idx += 1
from utils import get_data def solve(data, limit): last = {v: i for i, v in enumerate(data, 1)} prev = data[-1] for i in range(len(data) + 1, limit + 1): new = i - 1 - last[prev] if prev in last else 0 last[prev] = i - 1 prev = new return prev if __name__ == '__main__': data = get_data("15") data = list(map(int, data.split(','))) print(solve(data, 2020)) print(solve(data, 30000000))
#coding=utf-8 import os import difflib import tensorflow as tf import numpy as np from utils import decode_ctc, GetEditDistance # 0.准备解码所需字典,参数需和训练一致,也可以将字典保存到本地,直接进行读取 from utils import get_data, data_hparams data_args = data_hparams() train_data = get_data(data_args) # 1.声学模型----------------------------------- # from model_speech.cnn_ctc import Am, am_hparams # # am_args = am_hparams() # am_args.vocab_size = len(train_data.am_vocab) # am = Am(am_args) # print('loading acoustic model...') # am.ctc_model.load_weights('logs_am/model.h5') # 2.语言模型------------------------------------------- from model_language.transformer import Lm, lm_hparams lm_args = lm_hparams() lm_args.input_vocab_size = len(train_data.pny_vocab) lm_args.label_vocab_size = len(train_data.han_vocab) lm_args.dropout_rate = 0. print('loading language model...') lm = Lm(lm_args) sess = tf.Session(graph=lm.graph)
from utils import get_data, output if __name__ == "__main__": # 1.prepare data and define epochs epochs = 10 optimizer = "adam" dropout = 0.5 data_type = 'raw' if sys.argv.__len__() == 3: data_type = sys.argv[1] epochs = int(sys.argv[2]) data, train, test = get_data(data_type) # 2.count #unique features for each sparse field,and record dense feature field name fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4) for i, feat in enumerate(sparse_features)] + [DenseFeat(feat, 1, ) for feat in dense_features] dnn_feature_columns = fixlen_feature_columns linear_feature_columns = fixlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) # 3.generate input data for model train_model_input = {name: train[name] for name in feature_names} test_model_input = {name: test[name] for name in feature_names}
73: "CDLTAKURI", 74: "CDLTASUKIGAP", 75: "CDLTHRUSTING", 76: "CDLTRISTAR", 77: "CDLUNIQUE3RIVER", 78: "CDLUPSIDEGAP2CROWS", 79: "CDLXSIDEGAP3METHODS", 80: 'ema_12', 81: 'ema_26', 82: 'upper_list', 83: 'lower_list', 84: 'K_value', 85: 'D_value', 86: 'label' } df = utils.get_data(MYDB, SQL, NAME_DICT) if isinstance(df, int) == False and len(df) != 0: df = utils.get_data(MYDB, SQL, NAME_DICT) param_grid = { 'n_estimators': [100, 200, 300, 400, 500, 700, 800, 1000], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth': [4, 5, 6, 7, 8, 10, 15, 20, 50, 100], 'criterion': ['gini', 'entropy'] } scoring = ['precision_macro', 'recall_macro'] feature_cols = [ 'volume', 'numberOfTrades', 'var_ema', 'var_bollinger', 'var_stoch', 'rsi_indicator', 'stoch_indicator', 'RSI', 'ema_indicator', 'bollinger_indicator', 'CDL2CROWS', 'CDL3BLACKCROWS', 'CDL3INSIDE', 'CDL3LINESTRIKE', 'CDL3OUTSIDE',
workdir = '/home/miguel/Documents/tese/ViscoPlastic-ML/2D/butterfly/' abaqus_dir = '/home/miguel/Documents/tese/ViscoPlastic-ML/abaqus_deploy/butterfly/' workvid = '/home/miguel/Documents/tese/ViscoPlastic-ML/2D/butterfly/videos/' P = 'P_1' el_num = 75 P = ['P_1', 'P_2', 'P_3'] el_num = [75, 322, 229] for n in range(0, 3): data_dir = [] data_dir.append(abaqus_dir + 'state_butterfly_hist.txt') data_dir.append(workdir + '/results/' + P[n] + '/data_' + P[n] + '.csv') data_dir.append(abaqus_dir + 'deriv_butterfly_hist.txt') abaqus, chaboche = get_data(data_dir) chaboche = chaboche.drop(chaboche.index[len(chaboche) - 1]) abaqus = rename_headers(abaqus) # Select from abaqus dataframe info about el_num and int_point fea = get_dataframe(abaqus, el_num[n], 2) exit() # remove repeated values # Plot and save graphs of Back stress title = 'Back Stress' Writer = animation.writers['ffmpeg'] writer = Writer(fps=2, metadata=dict(artist='Me'), bitrate=-1) fig = plt.figure(figsize=(10, 6)) plt.xlim(0, 0.05) plt.ylim(0, 75) ani = matplotlib.animation.FuncAnimation(fig,
print("ENTER THE EXPERIMENT NUMBER: ") print("1. Experiment 1") print("2. Experiment 2") print("3. Experiment 3") print("4. Experiment 4") print("5. ANN with layers 1024-512-256-32-1 and sigmoid") print("6. ANN with layers 1024-512-256-32-1 and sigmoid with Adam") print("7. CNN with tanh activation") print("8. CNN with tanh activation and Adam") print("9. ANN with original layers and relu as activation") choice = int(input()) if (choice == 1): X, Y = utils.get_data('steering', 0) num_examples = X.shape[0] split = int(num_examples * 0.8) X_train, Y_train, means, stds = prepareTrainData(X[:split], Y[:split]) X_test, Y_test = X[split:], Y[split:] X_val, Y_val = prepareTestData(X_test, Y_test, means, stds) experiment_one(X_train, Y_train, X_val, Y_val) elif (choice == 2): X, Y = utils.get_data('steering', 0) num_examples = X.shape[0] split = int(num_examples * 0.8) X_train, Y_train, means, stds = prepareTrainData(X[:split], Y[:split]) X_test, Y_test = X[split:], Y[split:] X_val, Y_val = prepareTestData(X_test, Y_test, means, stds) experiment_two_32(X_train, Y_train, X_val, Y_val)
from utils import get_data data = list(map(int, get_data().split())) def fuel_req(mass): return max((mass // 3) - 2, 0) print("Fuel requirement without fuel: ", sum([fuel_req(m) for m in data])) tot_fuel = 0 for m in data: fuel = fuel_req(m) tot_fuel += fuel while fuel > 0: fuel = fuel_req(fuel) tot_fuel += fuel print("Fuel requirement with fuel: ", tot_fuel)
elif instruction == "^": santa.y += 1 elif instruction == "v": santa.y -= 1 visited_locations.add((santa.x, santa.y)) elif i % 2 == 1: if instruction == ">": robo_santa.x += 1 elif instruction == "<": robo_santa.x -= 1 elif instruction == "^": robo_santa.y += 1 elif instruction == "v": robo_santa.y -= 1 visited_locations.add((robo_santa.x, robo_santa.y)) return len(visited_locations) if __name__ == '__main__': data = get_data("03") p1_result = part_one(data) print(p1_result) p2_result = part_two(data) print(p2_result)
# import matplotlib.pyplot as plt # 0.准备训练所需数据------------------------------ data_args = data_hparams() data_args.data_type = 'train' data_args.data_path = '../../dataset/' # data_args.data_path = '/mnt/data/wanli/dataset/data_thchs30/train/' data_args.thchs30 = True data_args.aishell = False data_args.prime = False data_args.stcmd = False data_args.batch_size = 4 # data_args.data_length = 10 data_args.data_length = None data_args.shuffle = True train_data = get_data(data_args) # 0.准备验证所需数据------------------------------ data_args = data_hparams() data_args.data_type = 'dev' data_args.data_path = '../../dataset/' # data_args.data_path = '/mnt/data/wanli/dataset/data_thchs30/test/' data_args.thchs30 = True data_args.aishell = False data_args.prime = False data_args.stcmd = False data_args.batch_size = 4 data_args.data_length = None # data_args.data_length = 10 data_args.shuffle = True dev_data = get_data(data_args)
print('Epoch %05d: early stopping' % (self.stopped_epoch + 1)) def get_monitor_value(self, logs): logs = logs or {} monitor_value = logs.get(self.monitor) return monitor_value # ------------------------------------------------------------------------------------------------------------------- # Functions and classes end, training script start ------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------------------------- model = getattr(models, args.model) # get model_fn # get datasets for target / source models target_train_set, target_test_set, source_train_set, source_test_set, input_dim, n_classes = utils.get_data( args.dataset, args.ndata) regularization = "none" if args.defense in ['l1', 'l2', 'dropout']: regularization == args.regularization elif args.defense in ['', 'dp', 'advreg', 'fine-tune', 'whole']: # last two are transfer learning regularization = 'none' # these train the model differently, which we will do at training time below. else: raise ValueError(f"Defense: {args.defense} not valid") # define and compile model target_model = model(input_dim, args.model_depth, regularization, args.reg_constant, n_classes) source_model = model(input_dim, args.model_depth, regularization, args.reg_constant, n_classes) t_optim = tf.keras.optimizers.Adam()
res = sys_score / ideal_score return res def save_ass(a): with open('cluster_assignment', 'wb') as f: pickle.dump(a, f) def load_ass(f_name): with open(f_name, 'rb') as f: model = pickle.load(f) return model docs, q_docs, mod = utils.get_data(mode='test', t_='lda', n_=64) print 'Loading cluster' with open('km_80', 'rb') as f: model = pickle.load(f) # a = assign_cluster(docs,model,mod) # print 'Saving assignments' # save_ass(a) a = load_ass('cluster_assignment') avg = 0 n = len(q_docs) print n for q_id in q_docs: q = q_docs[q_id] rankings = get_top(q, docs, mod, model, a) val = get_ndcg(rankings, 10) print str(q_id), str(val)
def main(): logger.info("Logger is set - training start") # set default gpu device id torch.cuda.set_device(config.gpus[0]) # set seed np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.benchmark = True # get data with meta info input_size, input_channels, n_classes, train_data, valid_data = utils.get_data( config.dataset, config.data_path, config.cutout_length, validation=True, autoaugment=config.autoaugment) if config.label_smooth != 0: criterion = utils.CrossEntropyLabelSmooth( 10, config.label_smooth).to(device) else: criterion = nn.CrossEntropyLoss().to(device) use_aux = config.aux_weight > 0. if config.dataset in utils.LARGE_DATASETS: model = AugmentCNNImageNet(input_size, input_channels, config.init_channels, n_classes, config.layers, use_aux, config.genotype) else: model = AugmentCNN(input_size, input_channels, config.init_channels, n_classes, config.layers, use_aux, config.genotype, SSC=config.SSC) model = nn.DataParallel(model, device_ids=config.gpus).to(device) # model size mb_params = utils.param_size(model) logger.info("Model size = {:.3f} MB".format(mb_params)) # weights optimizer if config.p != 1: optimizer = torch.optim.SGD(model.parameters(), 1., momentum=config.momentum, weight_decay=config.weight_decay) else: optimizer = torch.optim.SGD(model.parameters(), config.lr, momentum=config.momentum, weight_decay=config.weight_decay) train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, shuffle=True, num_workers=config.workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True) if config.p == 1: lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, config.epochs) else: lr_cpa = utils.cosine_power_annealing_lr(nepochs=config.epochs, min_lr=config.lr_min, max_lr=config.lr, p=config.p) lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [lr_cpa]) best_top1 = 0. # training loop for epoch in range(config.epochs): lr_scheduler.step() drop_prob = config.drop_path_prob * epoch / config.epochs model.module.drop_path_prob(drop_prob) # training train(train_loader, model, optimizer, criterion, epoch) # validation cur_step = (epoch + 1) * len(train_loader) top1 = validate(valid_loader, model, criterion, epoch, cur_step) # save if best_top1 < top1: best_top1 = top1 is_best = True else: is_best = False utils.save_checkpoint(model, config.path, is_best) print("") logger.info("Final best Prec@1 = {:.4%} for job {}".format( best_top1, config.name))
import numpy as np import torch from torch import optim import matplotlib.pyplot as plt from sklearn.preprocessing import MinMaxScaler from utils import get_data, train, predict sc = MinMaxScaler() seq_length = 4 alpha_array = np.array([0.9]) Xtrain, Xtest, Ytrain, Ytest, dataX, dataY = get_data(alpha_array, series_length=1440, seq_length=seq_length, sc=sc) # The model will be a sequence of layers (?) model = torch.nn.Sequential() model.add_module('dense1', torch.nn.Linear(seq_length, 8)) model.add_module('relu1', torch.nn.Sigmoid()) model.add_module('dense2', torch.nn.Linear(8, 1)) loss = torch.nn.MSELoss(reduction='mean') optimizer = optim.SGD(model.parameters(), lr=0.0611, momentum=0.9) epochs = 200 # Increase amount of epochs for better accuracy. batch_size = 32 n_batches = Xtrain.size()[0] // batch_size costs = []
top_k_corrs = [ pair for pair in corrs[1:] if pair[0] in items_ranked_by_user and pair[1] > 0 ][:self.k] #biases for the ranked items of user u B_ujs = [ self.global_bias + self.user_biases[user] + self.item_biases[pair[0]] for pair in top_k_corrs ] N_uks = [ pair[1] for pair in top_k_corrs ] #the corr of k nearest neighbors of item i that are rated by user u R_ujs = [csr[user, pair[0]] for pair in top_k_corrs] #ratings of these items B_ujs = np.array(B_ujs) N_uks = np.array(N_uks) R_ujs = np.array(R_ujs) pred = (np.dot(N_uks, (R_ujs.T - B_ujs)) / N_uks.sum()) + b_ui return pred if __name__ == '__main__': baseline_knn_config = Config(k=10) train, validation = get_data() knn_baseline = KnnBaseline(baseline_knn_config) knn_baseline.fit(train) print(knn_baseline.calculate_rmse(validation))
from sklearn.externals import joblib from sklearn import preprocessing from sklearn.neural_network import MLPRegressor from sklearn.model_selection import validation_curve from matplotlib import pyplot as plt import pickle import sys import numpy as np from sklearn.utils import shuffle from utils import get_data workdir = '/home/miguel/Documents/tese/ViscoPlastic-ML/2D/train/' # Load Dataset X, y = get_data(workdir, 'training_data') scaler_x = preprocessing.StandardScaler() scaler_y = preprocessing.StandardScaler() # Fit the scaler to transform features and targets scaler_x.fit(X) scaler_y.fit(y) # Split Dataset into training and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=42) # Transform data to training and testing X = scaler_x.transform(X)
def get_item_by_title(item_title): for item in get_data(): if item["title"] == item_title: return item
import utils as utl import pso data = utl.get_data(file_path="assets/sprint7ToroideMixto.csv") pso = pso.ClusteringPSO(data, 50, max_iter=100) print(pso.search)
from utils import get_data, answers, print_answers data = get_data(2020, 11).split('\n') def surrounding_seats(data, row, seat): full = 0 if (row + 1) < len(data): if data[row + 1][seat] == "#": full += 1 if (seat + 1) < len(data[row]): if data[row + 1][seat + 1] == "#": full += 1 if (seat - 1) >= 0: if data[row + 1][seat - 1] == "#": full += 1 if (row - 1) >= 0: if data[row - 1][seat] == "#": full += 1 if (seat + 1) < len(data[row]): if data[row - 1][seat + 1] == "#": full += 1 if (seat - 1) >= 0: if data[row - 1][seat - 1] == "#": full += 1 if (seat + 1) < len(data[row]): if data[row][seat + 1] == "#": full += 1 if (seat - 1) >= 0: if data[row][seat - 1] == "#": full += 1
import utils import loss if __name__ == '__main__': # Make directory to save plots path = os.path.join( os.getcwd(), 'plots', args.loss + ("_top_k" if args.topk else "") + ("_sn" if args.spectral_norm else "") + ("_clip" if args.clip_weights else "")) os.makedirs(path, exist_ok=True) # Init hyperparameters fixed_generator_noise: torch.Tensor = torch.randn( [args.samples // 10, args.latent_size], device=args.device) # Get data data: torch.Tensor = utils.get_data(samples=args.samples).to(args.device) # Get generator generator: nn.Module = utils.get_generator(latent_size=args.latent_size) # Get discriminator discriminator: nn.Module = utils.get_discriminator( use_spectral_norm=args.spectral_norm) # Init Loss function if args.loss == 'standard': loss_generator: nn.Module = loss.GANLossGenerator() loss_discriminator: nn.Module = loss.GANLossDiscriminator() elif args.loss == 'non-saturating': loss_generator: nn.Module = loss.NSGANLossGenerator() loss_discriminator: nn.Module = loss.NSGANLossDiscriminator() elif args.loss == 'hinge': loss_generator: nn.Module = loss.HingeGANLossGenerator() loss_discriminator: nn.Module = loss.HingeGANLossDiscriminator()