def _handleInput(self, paramInput): """ Function to handle the common parts of the model parameter input. @ In, paramInput, InputData.ParameterInput, the already parsed input. @ Out, None """ Model._handleInput(self, paramInput)
def buildModels(t: str): global AnkiModels y = json.loads(t) templates = [] flds = [] with IncrementalBar("\tBuilding Models", max=len(y.keys())) as bar: for k in y.keys(): AnkiModels[str( y[k]["id"])] = Model(str(y[k]["id"]), y[k]["type"], cssutils.parseString(y[k]["css"]), y[k]["latexPre"], y[k]["latexPost"]) for fld in y[k]["flds"]: flds.append((fld["name"], fld["ord"])) flds.sort(key=lambda x: int(x[1])) AnkiModels[str(y[k]["id"])].flds = tuple([f[0] for f in flds]) for tmpl in y[k]["tmpls"]: templates.append( Template(tmpl["name"], tmpl["qfmt"], tmpl["did"], tmpl["bafmt"], tmpl["afmt"], tmpl["ord"], tmpl["bqfmt"])) AnkiModels[str(y[k]["id"])].tmpls = tuple(templates) templates = [] flds = [] bar.next() bar.finish()
class TestModel(unittest.TestCase): def setUp(self): self.model = Model() def test_produce_initial_max_likelihood_estimates_invalid_check_ins(self): check_ins_invalid = [ {'venue_id': '41059b00f964a520850b1fe3', 'latitude': 37.6164, 'check_in_message': 'empty_message', 'check_in_id': '12', 'longitude': -122.386, 'date': datetime.datetime(2012, 7, 18, 14, 43, 38)}, {'venue_id': '41059b00f964a520850b1fe3', 'latitude': 50.6164, 'check_in_message': 'empty_message', 'check_in_id': '14', 'longitude': 122.386, 'date': datetime.datetime(2012, 7, 18, 14, 43, 38)}, {'venue_id': '41059b00f964a520850b1fe3', 'latitude': 51, 'check_in_message': 'empty_message', 'check_in_id': '12', 'longitude': 120.386, 'date': datetime.datetime(2012, 7, 18, 14, 43, 38)}, {'venue_id': '41059b00f964a520850b1fe3', 'latitude': 35, 'check_in_message': 'empty_message', 'check_in_id': '13', 'longitude': -120.386, 'date': datetime.datetime(2012, 7, 18, 14, 43, 38)} ] check_ins_valid = [ {'venue_id': '41059b00f964a520850b1fe3', 'latitude': 37.6164, 'check_in_message': 'empty_message', 'check_in_id': '12', 'longitude': -122.386, 'date': datetime.datetime(2012, 7, 18, 14, 43, 38)}, {'venue_id': '41059b00f964a520850b1fe3', 'latitude': 50.6164, 'check_in_message': 'empty_message', 'check_in_id': '14', 'longitude': 122.386, 'date': datetime.datetime(2012, 7, 18, 14, 43, 38)}, {'venue_id': '41059b00f964a520850b1fe3', 'latitude': 51, 'check_in_message': 'empty_message', 'check_in_id': '15', 'longitude': 120.386, 'date': datetime.datetime(2012, 7, 18, 14, 43, 38)}, {'venue_id': '41059b00f964a520850b1fe3', 'latitude': 35, 'check_in_message': 'empty_message', 'check_in_id': '13', 'longitude': -120.386, 'date': datetime.datetime(2012, 7, 18, 14, 43, 38)} ] with self.assertRaises(ValueError) as cm: self.model.check_max_likelihood_estimates_input(check_ins_invalid, check_ins_valid) self.assertEqual(cm.exception.message, "Error: some check-ins have same IDs!") def test_produce_initial_check_in_assignment_invalid_check_ins(self): check_ins_invalid = [ {'venue_id': '41059b00f964a520850b1fe3', 'latitude': 37.6164, 'check_in_message': 'empty_message', 'check_in_id': '12', 'longitude': -122.386, 'date': datetime.datetime(2012, 7, 18, 14, 43, 38)}, {'venue_id': '41059b00f964a520850b1fe3', 'latitude': 50.6164, 'check_in_message': 'empty_message', 'check_in_id': '14', 'longitude': 122.386, 'date': datetime.datetime(2012, 7, 18, 14, 43, 38)}, {'venue_id': '41059b00f964a520850b1fe3', 'latitude': 51, 'check_in_message': 'empty_message', 'check_in_id': '12', 'longitude': 120.386, 'date': datetime.datetime(2012, 7, 18, 14, 43, 38)}, {'venue_id': '41059b00f964a520850b1fe3', 'latitude': 35, 'check_in_message': 'empty_message', 'check_in_id': '13', 'longitude': -120.386, 'date': datetime.datetime(2012, 7, 18, 14, 43, 38)} ] with self.assertRaises(ValueError) as cm: self.model.produce_initial_check_in_assignment(check_ins_invalid) self.assertEqual(cm.exception.message, "Error: some check-ins have same IDs!")
def learner(cls, source_conf, target_conf): """ Run Waterloo's transfer learner :param source_conf: source dataset :type source_conf: str or pd.core.frame.DataFrame :param target_conf: target dataset :type target_conf: str or pd.core.frame.DataFrame :rtype: Float """ source_conf = pd.read_csv(source_conf).reset_index() if isinstance( source_conf, str) else source_conf.reset_index() target_conf = pd.read_csv(target_conf).reset_index() if isinstance( target_conf, str) else target_conf.reset_index() "Construct a prediction model using source" predict_model = Model.train_prediction_model(source_conf) """Sample 15 from train and test datasets to train a transfer model""" "Pick random 5, 10, or 15 samples" "Common rows" if len(source_conf) <= len(target_conf): p_src = source_conf[source_conf.columns[-1]].sample(5) p_tgt = target_conf[target_conf.columns[-1]].iloc[p_src.index] else: p_tgt = target_conf[target_conf.columns[-1]].sample(5) p_src = source_conf[source_conf.columns[-1]].iloc[p_tgt.index] "Train a transfer model" transfer_model = Model.train_transfer_model(p_src=p_src, p_tgt=p_tgt) "Remove elements used to train transfer model from target" target_conf = target_conf.drop(p_tgt.index, errors="ignore") "Perform tansfer" target_indep = target_conf[target_conf.columns[:-1]] target_actual = target_conf[target_conf.columns[-1]] predicted_raw = predict_model.predict(target_indep).reshape(-1, 1) target_predicted = transfer_model.predict(predicted_raw).reshape( 1, -1)[0] # "Get MMRE" # mmre = np.mean(abs(target_actual - target_predicted)) # return mmre # "Get rank difference" # r_diff = rank_diff(actual=target_actual, predicted=target_predicted) # return r_diff "Get Magnitude of Error" me = mag_abs_error(actual=target_actual, predicted=target_predicted) return me
def learner(cls, source_conf, target_conf): """ Run a baseline transfer learner :param source_conf: source dataset :type source_conf: str or pd.core.frame.DataFrame :param target_conf: target dataset :type target_conf: str or pd.core.frame.DataFrame :rtype: Float """ source_conf = pd.read_csv(source_conf).sample( frac=0.2).reset_index() if isinstance(source_conf, str) else source_conf.sample( frac=0.2).reset_index() target_conf = pd.read_csv(target_conf).reset_index() if isinstance( target_conf, str) else target_conf.reset_index() "Size of the training configurations" n_rows_src = len(source_conf) "Construct a prediction model using source" predict_model = Model.train_baseline_model(source_conf) "Common rows" if len(source_conf) <= len(target_conf): p_src = source_conf[source_conf.columns[-1]] p_tgt = target_conf[target_conf.columns[-1]].iloc[p_src.index] else: p_tgt = target_conf[target_conf.columns[-1]] p_src = source_conf[source_conf.columns[-1]].iloc[p_tgt.index] "Train a transfer model" transfer_model = Model.train_transfer_model(p_src=p_src, p_tgt=p_tgt) "Perform tansfer" target_indep = target_conf[target_conf.columns[:-1]] target_actual = target_conf[target_conf.columns[-1]] predicted_raw = predict_model.predict(target_indep).reshape(-1, 1) target_predicted = transfer_model.predict(predicted_raw).reshape( 1, -1)[0] # "Get MMRE" # mmre = np.mean(abs(target_actual - target_predicted)) # return mmre # "Get rank difference" # r_diff = rank_diff(actual=target_actual, predicted=target_predicted) # return r_diff "Get Magnitude of Error" me = mag_abs_error(actual=target_actual, predicted=target_predicted) return me
def submit(self,myInput,samplerType,jobHandler,**kwargs): """ This will submit an individual sample to be evaluated by this model to a specified jobHandler. Note, some parameters are needed by createNewInput and thus descriptions are copied from there. @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input @ In, jobHandler, JobHandler instance, the global job handler instance @ In, **kwargs, dict, is a dictionary that contains the information coming from the sampler, a mandatory key is the sampledVars'that contains a dictionary {'name variable':value} @ Out, None """ kwargs['forceThreads'] = True Model.submit(self,myInput, samplerType, jobHandler,**kwargs)
def _withdraw_funds(amount, person): """ Withdraws funds from person's account Args: amount: amount of money to deposit person: person that wants to transfer money""" Fund_Service.remove_funds(amount, person) withdrawal = Model.BankWithdrawals(person_id=person.id, amount=amount) Model.db.session.add(withdrawal) Model.db.session.commit()
def _deposit_funds(amount, person): """ Deposits funds into person's account Args: amount: amount of money to deposit person: person that wants to transfer money""" Fund_Service.add_funds(amount, person) deposit = Model.BankDeposits(person_id=person.id, amount=amount) print(deposit) Model.db.session.add(deposit) Model.db.session.commit()
def learner(cls, source_conf, target_conf): """ Run Pooyan's SEAMS transfer learner :param source_conf: source dataset :type source_conf: str or pd.core.frame.DataFrame :param target_conf: target dataset :type target_conf: str or pd.core.frame.DataFrame :rtype: Float """ source_conf = pd.read_csv(source_conf).sample( frac=0.2).reset_index() if isinstance(source_conf, str) else source_conf.sample( frac=0.2).reset_index() target_conf = pd.read_csv(target_conf).reset_index() if isinstance( target_conf, str) else target_conf.reset_index() "Find a random 5% sample of the target" n_rows_src = len(source_conf) n_rows_tgt = len(target_conf) sampled_tgt = target_conf.sample(n=int(n_rows_tgt * 0.05)) "Remove sampled rows from the original target dataset" target_conf.drop(sampled_tgt.index, errors="ignore", inplace=True) "Add sampled target to the source" sampled = pd.DataFrame(np.concatenate( (source_conf.values, sampled_tgt.values), axis=0), columns=source_conf.columns) "Construct a gaussian process model using source" predict_model = Model.train_gaussproc_model(sampled, target_conf) "Perform tansfer" target_indep = target_conf[target_conf.columns[:-1]] target_actual = target_conf[target_conf.columns[-1]] try: target_predicted = predict_model.predict(target_indep).reshape( -1, 1) except: set_trace() # "Get MMRE" # mmre = np.mean(abs(target_actual - target_predicted)) # return mmre # "Get rank difference" # r_diff = rank_diff(actual=target_actual, predicted=target_predicted) # return r_diff "Get Magnitude of Error" me = mag_abs_error(actual=target_actual, predicted=target_predicted) return me
def Main(): parser = argparse.ArgumentParser( description= 'A battleship game built with Python. You can play versus the ' 'computer or against someone else. Call the program with the ' 'arguments described below to enable those features. The colors ' 'of shots while playing represents the answer of the enemy. ' 'Once the game has ended, the console will output the result.') parser.add_argument( '-n', dest='network_player_name', action='store', default=False, help='Play against someone specific on the network "-n friendName", ' 'versus someone random on the server "-n None", ' 'or play against your computer without adding "-n". ' 'If you play on the network and does not find someone to play ' 'against within about 3 seconds, the game might freeze, ' 'then just restart it to try again.') parser.add_argument( '-c', dest='is_local_player_comp', action='store_true', default=False, help='Add this argument to play as the computer against your opponent.' ) parser.add_argument( '-s', dest='is_display_disabled', action='store_true', default=False, help='Add this argument to disable display. Can only be set if ' 'you play automatically as the computer. ') requiredNamed = parser.add_argument_group('required named arguments') requiredNamed.add_argument('-u', dest='username', help='Set your username: "******".', required=True) args = parser.parse_args() if args.is_display_disabled and not args.is_local_player_comp: # Do not raise as we do not want a stack trace. print( 'ERROR: Must play as computer to disable display. Add the "-c" argument.' ) return m = Model(args.username, not args.is_display_disabled, args.is_local_player_comp, args.network_player_name) v = Viewer(m) c = Controller(m, v.view)
def __init__(self, runInfoDict): """ Constructor @ In, runInfoDict, dict, the dictionary containing the runInfo (read in the XML input file) @ Out, None """ Model.__init__(self,runInfoDict) self.inputCheckInfo = [] # List of tuple, i.e input objects info [('name','type')] self.action = None # action self.workingDir = '' # path for working directory self.printTag = 'POSTPROCESSOR MODEL' self.outputDataset = False # True if the user wants to dump the outputs to dataset self.validDataType = ['PointSet','HistorySet'] # The list of accepted types of DataObject ## Currently, we have used both DataObject.addRealization and DataObject.load to ## collect the PostProcessor returned outputs. DataObject.addRealization is used to ## collect single realization, while DataObject.load is used to collect multiple realizations ## However, the DataObject.load can not be directly used to collect single realization ## One possible solution is all postpocessors return a list of realizations, and we only ## use addRealization method to add the collections into the DataObjects self.outputMultipleRealizations = False
def count(self,sequence1,sequence2): dec1 = FrequenceAnalysis.FrequenceAnalysis(sequence1) dec2 = FrequenceAnalysis.FrequenceAnalysis(sequence2) ''' probability no substitution occurs ''' #sites_count = len(min([dec1,dec2], key = len)) sites_count = len(dec1) different_sites = 0 for e1,e2 in zip(dec1,dec2): different_sites += (1- Model.dotProduct(e1,e2)) #return (-float(3)/4)* log1p(-(float(4)/3)*different_sites/sites_count) return different_sites/sites_count
def main(): # changeable print("Initialize") pathData = "data/Porter.txt" pathID = "data/ID/ID_Porter.txt" pathQuery = "data/Query/query.txt" uniqueTerm = 1341890 # 不在檔案中 isStemming = True # declare model MD = Model(pathData=pathData, pathID=pathID, pathQuery=pathQuery, uniqueT=uniqueTerm, isStemming=isStemming) print("VectorSpace Start") VSFile = open("output/Porter/Porter_VSFile.txt", "wt") MD.printVectorSpace(VSFile, k1=2, b=0.75) VSFile.close() print("VectorSpace End") print("") print("Laplace Start") LaplaceFile = open("output/Porter/Porter_LaplaceFile.txt", "wt") MD.printLanguageModelLaplace(LaplaceFile) LaplaceFile.close() print("Laplace End") print("") print("JM Start") JMFile = open("output/Porter/Porter_JMFile.txt", "wt") MD.printLanguageModelJM(JMFile, 0.2) JMFile.close() print("JM End")
def main_loop(path): env = gym.make('SpaceInvaders-v0') model = Model() model.load_variables(path) print(model.weights[0], "Loaded Weights") with tf.Session() as session: session.run(tf.global_variables_initializer()) max_episodes = 100000 env.reset() Q = None prev_obs = None curr_obs, curr_reward, done, info = env.step(0) curr_obs = process_observations(curr_obs, prev_obs) for eps in range(max_episodes): prob = model.forward_pass(session, curr_obs.reshape([1, 185, 120, 1])) action = choose_action(prob) curr_obs, curr_reward, done, info = env.step(action) curr_obs = process_observations(curr_obs, prev_obs) prev_obs = curr_obs env.render() if done is True: done = False env.reset()
def main_loop(): env = gym.make('SpaceInvaders-v0') with tf.Session() as session: model = Model() done = False alpha = 1e-3 gamma = 0.99 target = None episode = 0 curr_obs = env.reset() prev_obs = None net_reward = 0 curr_reward = 0 memory = [] death_count = 0 action_to_prob = [3, 2, 1, 1, 0] num_actions = 4 max_episodes = 50000 threshhold = 100 Q = None for eps in range(max_episodes): if Q is None: epsilon_prob = [ random.uniform(0, 1) for _ in range(num_actions) ] action = choose_action(epsilon_prob) elif eps % 57 == 0: # random asss number epsilon_prob = [ random.uniform(0, 1) for _ in range(num_actions) ] action = choose_action(epsilon_prob) else: prob = model.forward_pass(session, curr_obs.reshape([1, 185, 120, 1])) action = choose_action(prob) curr_obs, curr_reward, done, info = env.step(action) curr_obs = process_observations(curr_obs, prev_obs) memory.append((prev_obs, action, curr_reward, curr_obs, eps)) while len(memory) > 100: rand_death_i = random.randint(0, len(memory) - 1) del memory[rand_death_i] if eps % threshhold == 0: print("Episode", eps) rand_i = random.randint(0, len(memory) - 1) mem_sample = memory[rand_i] if Q is None: target = mem_sample[2] Q = np.random.random_sample((max_episodes, num_actions)) else: target = mem_sample[2] + gamma * Q[ mem_sample[-1], max_Q(Q, mem_sample[-1])] #print(target, "TARGET") model.train(session, training_data=mem_sample[-2], labels=Q[eps], target=target) Q[eps, action_to_prob[action]] += alpha * ( target - Q[eps, action_to_prob[action]]) env.render() prev_obs = curr_obs if info['ale.lives'] == 0: death_count = death_count + 1 done = False env.reset() print("Death count", death_count) print(model.save_variables(session))
im6 = im4 return im6 Test = np.zeros((ntest,imsize,imsize,nchannels)) for isample in range(ntest): path = '%s/I%05d.png' % (impath,isample+1) im = misc.imread(path).astype(float)/255 Test[isample,:,:,0] = im # -------------------------------------------------- print('model') # -------------------------------------------------- from Models import Model model = Model(nchannels, imcropsize) # -------------------------------------------------- print('recover parameters') # -------------------------------------------------- saver = tf.train.Saver() sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) # config parameter needed to save variables when using GPU saver.restore(sess, modelpath) print("Model restored.") # -------------------------------------------------- print('embedding') # --------------------------------------------------
def adaptive_abc_smc(n_obs: int, y_obs: [[float]], fitting_model: Models.Model, priors: ["stats.Distribution"], max_steps: int, sample_size: int, acceptance_kernel: "function", alpha: float, initial_scaling_factor=maxsize, terminal_scaling_factor=0, max_simulations=None, summary_stats=None, distance_measure=l2_norm, show_plots=True, printing=True) -> (Models.Model, [[float]], float): """ DESCRIPTION Fully adaptive Sequential Monte-Carlo Sampling version of Approximate Bayesian Computation for the generative models defined in `Models.py`. (Adaptive wrt perturbance kernel and bandwidths) PARAMETERS n_obs (int) - Number of observations available. y_obs ([[float]]) - Observations from true model. fitting_model (Model) - Model the algorithm will aim to fit to observations. priors (["stats.Distribution"]) - Priors for the value of parameters of `fitting_model`. max_steps (int) - Maximum number of resampling iterations (algorithm terminates if it reaches this value) sample_size (int) - Number of parameters samples to keep per step. acceptance_kernel (function) - Function to determine whether to accept parameters alpha (float) - Analogous to proportion of accepted samples to carry to next step. Used to determine acceptance kernel bandwidths. MUST be in (0,1). OPTIONAL PARAMETERS initial_scaling_factor (float in (0,1)) - Bandwidth the acceptance kernel begins at (default=maxsize) terminal_scaling_factor (float in (0,1)) - What value of bandwith to terminate the algorithm at. (default=0) max_simulations (int) - Maximum number of simulations (default=None=no limit). Only checked at the end of each iteration summary_stats ([function]) - functions which summarise `y_obs` and the observations of `fitting_model` in some way. (default=group by dimension) distance_measure - (func) - distance function to use (See choices above) show_plots (bool) - whether to generate and show plots (default=True) printing (bool) - whether to print updates to terminal (default=True) RETURNS Model - fitted model with best parameters [[float]] - set of all accepted parameter values (use for further investigation) """ # initial sampling if (alpha <= 0) and (alpha >= 1): raise ValueError("`alpha` must be in (0,1)") group_dim = lambda ys, i: [y[i] for y in ys] summary_stats = summary_stats if (summary_stats) else ([ (lambda ys: group_dim(ys, i)) for i in range(len(y_obs[0])) ]) s_obs = [s(y_obs) for s in summary_stats] scaling_factor = initial_scaling_factor # initial sampling THETAS = [] # (weight,params) distances = [] i = 0 while (len(THETAS) < sample_size): i += 1 theta_temp = [pi_i.rvs(1)[0] for pi_i in priors] # observed theorised model fitting_model.update_params(theta_temp) y_temp = fitting_model.observe() s_temp = [s(y_temp) for s in summary_stats] # accept-reject norm_vals = [ distance_measure(s_temp_i, s_obs_i) for (s_temp_i, s_obs_i) in zip(s_temp, s_obs) ] if (acceptance_kernel(l1_norm(norm_vals), scaling_factor)): distances.append((1 / sample_size, l1_norm(norm_vals))) THETAS.append((1 / sample_size, theta_temp)) if (printing): print("({:,}) - {:,}/{:,}".format(i, len(THETAS), sample_size), end="\r") if (printing): print() total_simulations = i # resampling & reweighting step t = 0 while (t < max_steps and scaling_factor > terminal_scaling_factor): if (not printing): print("*", sep="", end="") if (max_simulations and total_simulations >= max_simulations): break elif (printing): print("Total Sims = {:,} < {:,}\n".format(total_simulations, max_simulations)) i = 0 NEW_THETAS = [] # (weight,params) perturbance_kernels, perturbance_kernel_probability = __generate_smc_perturbance_kernels( [x[1] for x in THETAS], printing) scaling_factor = __calculate_scaling_factor(distances, acceptance_kernel, alpha) distances = [] while (len(NEW_THETAS) < sample_size): i += 1 if (printing): print("({:,}/{:,} - {:,}) - {:,}/{:,} (eps={:,.3f}>{:,.3f})". format(t, max_steps, i, len(NEW_THETAS), sample_size, scaling_factor, terminal_scaling_factor), end="\r", flush=True) # sample from THETA new_i = np.random.choice([i for i in range(len(THETAS))], size=1, p=[weight for (weight, _) in THETAS])[0] theta_t = THETAS[new_i][1] # perturb sample theta_temp = [ k(theta_i) for (k, theta_i) in zip(perturbance_kernels, theta_t) ] while any([ p.pdf(theta) == 0.0 for (p, theta) in zip(priors, theta_temp) ]): theta_temp = [ k(theta_i) for (k, theta_i) in zip(perturbance_kernels, theta_t) ] # observed theorised model fitting_model.update_params(theta_temp) y_temp = fitting_model.observe() s_temp = [s(y_temp) for s in summary_stats] # accept-reject norm_vals = [ distance_measure(s_temp_i, s_obs_i) for (s_temp_i, s_obs_i) in zip(s_temp, s_obs) ] if (acceptance_kernel(l1_norm(norm_vals), scaling_factor)): weight_numerator = sum( [p.pdf(theta) for (p, theta) in zip(priors, theta_temp)]) weight_denominator = 0 for (weight, theta) in THETAS: weight_denominator += sum([ weight * p(theta_i, theta_temp_i) for (p, theta_i, theta_temp_i) in zip( perturbance_kernel_probability, theta, theta_temp) ]) # probability theta_temp was sampled weight = weight_numerator / weight_denominator NEW_THETAS.append((weight, theta_temp)) distances.append((weight, l1_norm(norm_vals))) total_simulations += i weight_sum = sum([w for (w, _) in NEW_THETAS]) THETAS = [(w / weight_sum, theta) for (w, theta) in NEW_THETAS] distances = [(w / weight_sum, d) for (w, d) in distances] if (printing): print() t += 1 if (printing): print() param_values = [theta for (_, theta) in THETAS] weights = [w for (w, _) in THETAS] theta_hat = list(np.average(param_values, axis=0, weights=weights)) model_hat = fitting_model.copy(theta_hat) if (printing): print("Total Simulations - {:,}".format(total_simulations)) print("theta_hat -", theta_hat) if (show_plots): fig = plt.figure(constrained_layout=True) fig = __abc_smc_plotting(fig, y_obs, priors, fitting_model, model_hat, accepted_params=param_values, weights=weights) plt.show() # n_rows=max([1,np.lcm(fitting_model.n_params,fitting_model.dim_obs)]) # # fig=plt.figure(constrained_layout=True) # gs=fig.add_gridspec(n_rows,2) # # # plot fitted model # row_step=n_rows//fitting_model.dim_obs # for i in range(fitting_model.dim_obs): # ax=fig.add_subplot(gs[i*row_step:(i+1)*row_step,-1]) # y_obs_dim=[y[i] for y in y_obs] # Plotting.plot_accepted_observations(ax,fitting_model.x_obs,y_obs_dim,[],model_hat,dim=i) # # # row_step=n_rows//fitting_model.n_params # for i in range(fitting_model.n_params): # ax=fig.add_subplot(gs[i*row_step:(i+1)*row_step,0]) # name="theta_{}".format(i) # parameter_values=[theta[i] for theta in param_values] # Plotting.plot_smc_posterior(ax,name,parameter_values=parameter_values,weights=weights,predicted_val=theta_hat[i],prior=priors[i],dim=i) # # plt.show() return model_hat, param_values, weights
np.random.seed(50) N = 40 h = 0.5 eps_0 = 5e-4 T_inf = 50 z = np.linspace(0, 1, N+1) M = 20 beta_prior = np.ones((N-1,)) sigma_prior = 1e-1 C_beta = np.diag(sigma_prior**2*np.ones((N-1,))) model = Model(N, eps_0, h, T_inf) prior = Prior(beta_prior, C_beta) data = GenData(M, h, T_inf) data.gen_data(2, 'vector', scalar_noise=0.02) objfn = ObjectiveFn(z, data, model, prior) data_true = GenData(N, h, T_inf) T_true, _ = data_true.get_T_true() beta_test = model.get_beta_true(T_true) G_adjoint = objfn.compute_gradient_adjoint(beta_test) G_direct = objfn.compute_gradient_direct(beta_test) G_adjoint_cont = objfn.compute_gradient_adjoint_cont(beta_test) plt.figure()
def two_step_minimum_entropy(summary_stats: ["function"], n_obs: int, y_obs: [[float]], fitting_model: Models.Model, priors: ["stats.Distribution"], min_subset_size=1, max_subset_size=None, n_samples=1000, n_accept=100, n_keep=10, k=4, printing=False) -> ([int], [[float]]): """ OPTIONAL PARAMETERS n_keep (int) - number of (best) accepted samples to keep from the set of stats which minimise entropy (`best_stats`) and use for evaluating second stage (default=10) """ n_stats = len(summary_stats) max_subset_size = max_subset_size if (max_subset_size) else n_stats # find summary stats which minimise entropy me_stats_id, accepted_theta = minimum_entropy( summary_stats, n_obs, y_obs, fitting_model, priors, min_subset_size=min_subset_size, max_subset_size=max_subset_size, n_samples=n_samples, n_accept=n_accept, k=k, printing=printing) me_stats = [summary_stats[i] for i in me_stats_id] s_obs = [s(y_obs) for s in me_stats] if (printing): print("ME stats found -", me_stats_id, "\n") # identify the `n_keep` best set of parameters theta_scores = [] for (i, theta) in enumerate(accepted_theta): fitting_model.update_params(theta) y_t = fitting_model.observe() s_t = [s(y_t) for s in me_stats] weight = ABC.l1_norm([ ABC.l2_norm(s_t_i, s_obs_i) for (s_t_i, s_obs_i) in zip(s_t, s_obs) ]) theta_scores.append((weight, i)) theta_scores.sort(key=lambda x: x[0]) me_theta = [accepted_theta[x[1]] for x in theta_scores[:n_keep]] if (printing): print("ME theta found.\n") # all permutations of summary stats n_stats = len(summary_stats) perms = [] for n in range(min_subset_size, max_subset_size + 1): perms += [x for x in combinations([i for i in range(n_stats)], n)] lowest = ([], maxsize, []) # compare subsets of summary stats to sampling_details = { "sampling_method": "best", "num_runs": n_samples, "sample_size": n_accept, "distance_measure": ABC.log_l2_norm } for (i, perm) in enumerate(perms): if (printing): print("Permutation = ", perm, sep="") else: print("{}/{} ".format(i, len(perms)), end="\r") ss = [summary_stats[i] for i in perm] _, accepted_theta = ABC.abc_rejection(n_obs, y_obs, fitting_model, priors, sampling_details, summary_stats=ss, show_plots=False, printing=printing) rsses = [__rsse(accepted_theta, theta) for theta in me_theta] mrsse = np.mean(rsses) if (printing): print("MRSSE of ", perm, "= {:,.2f}\n".format(mrsse), sep="") if (mrsse < lowest[1]): lowest = (perm, mrsse, accepted_theta) return lowest[0], lowest[2]
else: w2v_size = 200 hidden_layer_dims = [int(h) for h in sys.argv[8].split(" ")] hidden_layer_activations = sys.argv[9].split(" ") hidden_layer_dropouts = [float(d) for d in sys.argv[10].split(" ")] window_size = int(sys.argv[11]) num_epochs = int(sys.argv[12]) loss_function = sys.argv[13] optimizer = sys.argv[14] if len(sys.argv) == 16: weights_location = sys.argv[15] else: weights_location = None #build model model = m.FF_keras(hidden_layer_dims=hidden_layer_dims, activations=hidden_layer_activations, embeddingClass=None, w2vDimension=w2v_size, window_size=window_size, hidden_dropouts=hidden_layer_dropouts, loss_function=loss_function, optimizer=optimizer, num_epochs=num_epochs) model.buildModel() print("loading data") model.loadData(training_vectors, training_labels, testing_vectors, testing_labels, number_training_points) print("training") model.train(None, 0, neg_sample, save_data=True, f_vec="training_instances/ff-Giga/training_X", f_lab="training_instances/ff-Giga/training_y") print("testing") model.test(None, 0, save_data=True, f_vec="training_instances/ff-Giga/testing_X", f_lab="training_instances/ff-Giga/testing_y") print("size of testing data", model.testing_X.shape) #save weights?
def setUp(self): self.model = Model()
iterator = tf.data.Iterator.from_structure(tr_data.output_types, tr_data.output_shapes) next_element = iterator.get_next() tr_init_op = iterator.make_initializer(tr_data) im1, im2, im3 = tf.split(next_element, 3, 3) triplet_batch = tf.tuple((im1, im2, im3)) # -------------------------------------------------- print('model') # -------------------------------------------------- from Models import Model model = Model(nchannels, imcropsize, testIdx) print('reslearn: ', model.residualLearning) # -------------------------------------------------- print('train') # -------------------------------------------------- saver = tf.train.Saver() sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True) ) # config parameter needed to save variables when using GPU if os.path.exists(trainlogpath): shutil.rmtree(trainlogpath) if os.path.exists(validlogpath): shutil.rmtree(validlogpath) train_writer = tf.summary.FileWriter(trainlogpath, sess.graph)
def joyce_marjoram(summary_stats: ["function"], n_obs: int, y_obs: [[float]], fitting_model: Models.Model, priors: ["stats.Distribution"], param_bounds: [(float, float)], distance_measure=ABC.l2_norm, KERNEL=ABC.uniform_kernel, BANDWIDTH=1, n_samples=10000, n_bins=10, printing=True) -> [int]: """ DESCRIPTION Use the algorithm in Paul Joyce, Paul Marjoram 2008 to find an approxiamtely sufficient set of summary statistics (from set `summary_stats`) PARAMETERS summary_stats ([function]) - functions which summarise `y_obs` and the observations of `fitting_model` in some way. These are what will be evaluated n_obs (int) - Number of observations available. y_obs ([[float]]) - Observations from true model. fitting_model (Model) - Model the algorithm will aim to fit to observations. priors (["stats.Distribution"]) - Priors for the value of parameters of `fitting_model`. param_bounds ([(float,float)]) - The bounds of the priors used to generate parameter sets. KERNEL (func) - one of the kernels defined above. determine which parameters are good or not. BANDWIDTH (float) - scale parameter for `KERNEL` n_samples (int) - number of samples to make n_bins (int) - Number of bins to discretise each dimension of posterior into (default=10) RETURNS [int] - indexes of selected summary stats in `summary_stats` """ if (type(y_obs) != list): raise TypeError("`y_obs` must be a list (not {})".format(type(y_obs))) if (len(y_obs) != n_obs): raise ValueError( "Wrong number of observations supplied (len(y_obs)!=n_obs) ({}!={})" .format(len(y_obs), n_obs)) if (len(priors) != fitting_model.n_params): raise ValueError( "Wrong number of priors given (exp fitting_model.n_params={})". format(fitting_model.n_params)) group_dim = lambda ys, i: [y[i] for y in ys] summary_stats = summary_stats if (summary_stats) else ([ (lambda ys: group_dim(ys, i)) for i in range(len(y_obs[0])) ]) s_obs = [s(y_obs) for s in summary_stats] # generate samples SAMPLES = [] # (theta,s_vals) for i in range(n_samples): if (printing): print("{:,}/{:,}".format(i + 1, n_samples), end="\r") # sample parameters theta_t = [pi_i.rvs(1)[0] for pi_i in priors] # observe theorised model fitting_model.update_params(theta_t) y_t = fitting_model.observe() s_t = [s(y_t) for s in summary_stats] SAMPLES.append((theta_t, s_t)) if (printing): print() for i in range(len(summary_stats)): print("var_{}={:,.3f}".format(i, np.var([x[1][i] for x in SAMPLES]))) # consider adding each summary stat in turn ACCEPTED_SUMMARY_STATS_ID = [] # index of accepted summary stats id_to_try = randint(0, len(summary_stats) - 1) ACCEPTED_SUMMARY_STATS_ID = [id_to_try] tried = [] while True: if (printing): print("Currently accepted - ", ACCEPTED_SUMMARY_STATS_ID) # samples using current accepted summary stats samples_curr = [(theta, [s[j] for j in ACCEPTED_SUMMARY_STATS_ID]) for (theta, s) in SAMPLES] s_obs_curr = [s_obs[j] for j in ACCEPTED_SUMMARY_STATS_ID] accepted_params_curr = [] for (theta_t, s_t) in samples_curr: norm_vals = [ distance_measure(s_t_i, s_obs_i) for (s_t_i, s_obs_i) in zip(s_t, s_obs_curr) ] if ( KERNEL(ABC.l1_norm(norm_vals), BANDWIDTH) ): # NOTE - ABC.l1_norm() can be replaced by anyother other norm accepted_params_curr.append(theta_t) # chooose next ss to try available_ss = [ x for x in range(len(summary_stats) - len(tried)) if (x not in ACCEPTED_SUMMARY_STATS_ID) and (x not in tried) ] if (len(available_ss) == 0): return ACCEPTED_SUMMARY_STATS_ID id_to_try = available_ss[randint(0, len(available_ss) - 1)] tried += [id_to_try] if (printing): print("Trying to add {} to [{}]".format( id_to_try, ",".join([str(x) for x in ACCEPTED_SUMMARY_STATS_ID]))) # samples using current accepted summary stats and id_to_try samples_prop = [ (theta, [s[j] for j in ACCEPTED_SUMMARY_STATS_ID + [id_to_try]]) for (theta, s) in SAMPLES ] s_obs_prop = [ s_obs[j] for j in ACCEPTED_SUMMARY_STATS_ID + [id_to_try] ] accepted_params_prop = [] for (theta_t, s_t) in samples_prop: norm_vals = [ distance_measure(s_t_i, s_obs_i) for (s_t_i, s_obs_i) in zip(s_t, s_obs_prop) ] if ( KERNEL(ABC.l1_norm(norm_vals), BANDWIDTH) ): # NOTE - ABC.l1_norm() can be replaced by anyother other norm accepted_params_prop.append(theta_t) if (printing): print("N_(k-1)={:,}".format(len(accepted_params_curr))) if (printing): print("N_k ={:,}".format(len(accepted_params_prop))) if (__compare_summary_stats(accepted_params_curr, accepted_params_prop, param_bounds, n_params=len(priors), n_bins=10)): # add id_to_try ACCEPTED_SUMMARY_STATS_ID += [id_to_try] if (printing): print("Accepting {}.\nCurrently accepted - ".format(id_to_try), ACCEPTED_SUMMARY_STATS_ID) # consider removing previous summaries if (printing): print("\nConsider removing previous summaries") for i in range(len(ACCEPTED_SUMMARY_STATS_ID) - 2, -1, -1): ids_minus = [ x for (j, x) in enumerate(ACCEPTED_SUMMARY_STATS_ID) if j != i ] if (printing): print("Comparing [{}] to [{}]".format( ",".join([str(x) for x in ACCEPTED_SUMMARY_STATS_ID]), ",".join([str(x) for x in ids_minus]))) # samples using reduced set samples_minus = [(theta, [s[j] for j in ids_minus]) for (theta, s) in SAMPLES] s_obs_minus = [s_obs[j] for j in ids_minus] accepted_params_minus = [] for (theta_t, s_t) in samples_minus: norm_vals = [ distance_measure(s_t_i, s_obs_i) for (s_t_i, s_obs_i) in zip(s_t, s_obs_minus) ] if ( KERNEL(ABC.l1_norm(norm_vals), BANDWIDTH) ): # NOTE - ABC.l1_norm() can be replaced by anyother other norm accepted_params_minus.append(theta_t) if (__compare_summary_stats(accepted_params_prop, accepted_params_minus, param_bounds, n_params=len(priors), n_bins=10)): if (printing): print("Removing - ", ACCEPTED_SUMMARY_STATS_ID[i]) ACCEPTED_SUMMARY_STATS_ID = ids_minus if (printing): print("Reduced to - ", ACCEPTED_SUMMARY_STATS_ID) if (printing): print() return ACCEPTED_SUMMARY_STATS_ID
def abc_semi_auto(n_obs: int, y_obs: [[float]], fitting_model: Models.Model, priors: ["stats.Distribution"], distance_measure=ABC.l2_norm, n_pilot_samples=10000, n_pilot_acc=1000, n_params_sample_size=100, summary_stats=None, printing=True) -> (["function"], [[float]]): group_dim = lambda ys, i: [y[i] for y in ys] summary_stats = summary_stats if (summary_stats) else ([ (lambda ys: group_dim(ys, i)) for i in range(len(y_obs[0])) ]) sampling_details = { "sampling_method": "best", "num_runs": n_pilot_samples, "sample_size": n_pilot_acc, "distance_measure": distance_measure, "params_sample_size": n_params_sample_size } #perform pilot run _, pilot_params = ABC.abc_rejection(n_obs=n_obs, y_obs=y_obs, fitting_model=fitting_model, priors=priors, sampling_details=sampling_details, summary_stats=summary_stats, show_plots=False, printing=printing) # calculate distribution of accepted params new_priors = [] for i in range(fitting_model.n_params): pilot_params_dim = [x[i] for x in pilot_params] dist = stats.gaussian_kde(pilot_params_dim) new_priors.append(dist) if (printing): print("Calculated posteriors from pilot.") # Sample new parameters and simulate model m = sampling_details["params_sample_size"] if ( "params_sample_size" in sampling_details) else 1000 samples = [] for i in range(m): if (printing): print("{}/{}".format(i, m), end="\r") theta_t = [list(p.resample(1))[0][0] for p in new_priors] # observe theorised model fitting_model.update_params(theta_t) y_t = fitting_model.observe() s_t = [s(y_t) for s in summary_stats] samples.append((theta_t, s_t)) if (printing): print("Generated {} parameter sets.".format(m)) # create summary stats # NOTE - other methods can be used new_summary_stats = [] X = [list(np.ravel(np.matrix(x[1]))) for x in samples] # flatten output data X = np.array(X) coefs = [] for i in range(fitting_model.n_params): y = np.array([x[0][i] for x in samples]) reg = LinearRegression().fit(X, y) coefs.append(list(reg.coef_)) new_summary_stats = [ lambda xs: list(np.dot(coefs, np.ravel(np.matrix(xs)))) ] s_t = [s(samples[0][1]) for s in new_summary_stats] if (printing): print("Generated summary statistics") return new_summary_stats, coefs
C_m_type = 'matrix' N = 32 h = 0.5 eps_0 = 5e-4 T_inf = 50 * np.ones((N - 1, )) z = np.linspace(0, 1, N + 1) M = 32 T_inf_data = 50 * np.ones((M - 1, )) beta_prior = np.ones((N - 1, )) sigma_prior = 0.8 C_beta = np.diag(sigma_prior**2 * np.ones((N - 1, ))) model = Model(N, eps_0, h, T_inf) prior = Prior(beta_prior, C_beta) data = GenData(M, h, T_inf_data) data.gen_data(100, C_m_type=C_m_type, scalar_noise=0.02) objfn = ObjectiveFn(z, data, model, prior) beta0 = np.ones(N - 1) optimizer = 'CG' opt = Optimization(model, prior, data, objfn, beta0, optimizer) opt.compute_MAP_properties() opt.compute_base_properties() opt.sample_posterior(int(1e6)) opt.compute_true_properties()
(T_MAP_lst, beta_r_MAP_lst, beta_c_MAP_lst, std_lst, C_m_type) = pickle.load(open('../data/fig3%s.p'%C_m_type,'rb')) f = plt.figure(figsize=(14,4)) ax1 = f.add_subplot(131) ax2 = f.add_subplot(132) ax3 = f.add_subplot(133) for i,T_inf in enumerate(range(5, 51, 5)): data = GenData(M, h, T_inf) T_true, _ = data.get_T_true(with_rand=False) model = Model(N, eps_0, h, T_inf) beta_r_true = model.get_beta_r_true(T_true, with_rand=False) ax1.plot(T_true, beta_r_true, 'k', label='True' if i ==0 else '') T_MAP = T_MAP_lst[i] beta_r_MAP = beta_r_MAP_lst[i] std = std_lst[i] ax1.errorbar(T_MAP, beta_r_MAP, yerr=std, marker='o', fillstyle='none', linestyle='none', mec='r', ecolor='0.5', ms=4, capsize=3, label='MAP' if i==0 else '') ax1.set_xlabel(r'$T$') ax1.set_ylabel(r'$\beta_r$') if C_m_type=='matrix': ax1.axis([0, 50, 0.2, 1.8])
h = 0.5 eps_0 = 5e-4 T_inf = 25 z = np.linspace(0, 1, N + 1) # parameters for GenData M = 32 # parameters for Prior beta_prior = np.ones((N - 1, )) sigma_prior = 1 C_beta = np.diag(sigma_prior**2 * np.ones((N - 1, ))) # start model = Model(N, eps_0, h, T_inf) prior = Prior(beta_prior, C_beta) data = GenData(M, h, T_inf) R = np.linalg.cholesky(C_beta) N_samples = int(1e3) beta_samp_lst = [] T_samp_lst = [] for i in range(N_samples): s = np.random.randn(N - 1) beta_sample = beta_prior + R.dot(s) T_sample, _ = model.get_T_mult(beta_sample)
print(' h1:') scores, evaluated_params = evaluate_params( model_type, h1_train_x, h1_train_y, tuning_x, tuning_y, metrics[0], params, get_autc=autotune_autc, verbose=verbose) # scores_h1.append(scores) if params_list is None: params_list = evaluated_params h1 = Model(model_type, 'h1', params=params_list[np.argmax(scores)]) else: h1 = Model(model_type, 'h1', params=chosen_params) h1.fit(h1_train_x, h1_train_y) user_count = 0 # todo: USER LOOP for user_id, item in hists_inner_seed_by_user.items(): hist_train, hist_valid, hist_test_x, hist_test_y = item if chrono_split: hist_train_range = np.zeros(len(h2_train)) start_idx, end_idx = hist_train_ranges[user_id][0] hist_train_range[start_idx:end_idx] = 1 else:
imcropsize = 128 # should be the same as in train.py nchannels = 1 # should be the same as in train.py modelpath = '/home/mc457/Workspace/ImageForensics/TrainedModel/model.ckpt' impath = '/home/mc457/Images/ImageForensics/SynthExamples/Test' outfigpath = '/home/mc457/Workspace/ImageForensics/AcrcTest.png' nruns = 1 imtestoutpath = '/home/mc457/Workspace/ImageForensics/TestSynth' # images saved only when nruns == 1 thr = 0.5 # -------------------------------------------------- print('load model and parameters') # -------------------------------------------------- from Models import Model model = Model(nchannels, imcropsize, 1) saver = tf.train.Saver() sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True) ) # config parameter needed to save variables when using GPU saver.restore(sess, modelpath) print("Model restored.") # -------------------------------------------------- print('test') # -------------------------------------------------- classes = os.listdir(impath) impaths = []
import datetime
# -------------------------------------------------- print('parameters') # -------------------------------------------------- imcropsize = 128 # should be the same as in train.py nchannels = 1 # should be the same as in train.py modelpath = '/home/mc457/Workspace/ImageForensics/TrainedModel/model.ckpt' impath = '/home/mc457/Images/ImageForensics/SynthExamples/Test' # -------------------------------------------------- print('load model and parameters') # -------------------------------------------------- from Models import Model model = Model(nchannels, imcropsize, 1) saver = tf.train.Saver() sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True) ) # config parameter needed to save variables when using GPU saver.restore(sess, modelpath) print("Model restored.") # -------------------------------------------------- print('test') # -------------------------------------------------- classes = os.listdir(impath) classes.sort()
data['Open'].isnull().sum() data['Volume_(BTC)'].fillna(value=0, inplace=True) data['Volume_(Currency)'].fillna(value=0, inplace=True) data['Weighted_Price'].fillna(value=0, inplace=True) data['Open'].fillna(method='ffill', inplace=True) data['High'].fillna(method='ffill', inplace=True) data['Low'].fillna(method='ffill', inplace=True) data['Close'].fillna(method='ffill', inplace=True) # standardize the data df = (data - data.mean())/ data.std() # lr = Model(df, 0.025).linear_regression() nn1 = Model(df, 0.025).nn_1_keras() print('hi') print('hi') print('hi') print('hi')
#sys.argv[4] = max length of sentence to consider (currently using 30); needed to handle fixed size of input matrix #sys.argv[5] = number of layers #sys.argv[6] = c_size #sys.argv[7] = # of epochs #sys.argv[8] = loss function #sys.argv[9] = optimizer #sys.argv[10] = pickle file for training data #sys.argv[11] = pickle file for testing data #sys.argv[12] = OPTIONAL location of .h5 file to save weights if "embed" not in sys.argv[3]: print("loading embeddings") w2v = g.Word2Vec.load_word2vec_format(sys.argv[3], binary=False) model = m.LSTM_keras(num_layers=int(sys.argv[5]), embeddingClass=w2v, w2vDimension=int(len(w2v["the"])), max_seq_length=int(sys.argv[4]), cSize=int(sys.argv[6]), num_epochs=int(sys.argv[7]), loss_function=sys.argv[8], optimizer=sys.argv[9]) else: embedding = e.Embedding_keras() print("calculating vocab size") embedding.getVocabSize(sys.argv[1], int(sys.argv[2])) print("vocab size", embedding.vocSize) print("building embedding layer") embedding.build() model = m.LSTM_keras(num_layers=int(sys.argv[5]), embeddingLayerClass=embedding, max_seq_length=int(sys.argv[4]), cSize=int(sys.argv[6]), num_epochs=int(sys.argv[7]), loss_function=sys.argv[8], optimizer=sys.argv[9]) print("preparing data file")
# -------------------------------------------------- imcropsize = 128 # should be the same as in train.py nchannels = 1 # should be the same as in train.py modelpath = '/home/mc457/Workspace/TFModel/SiamRelease.ckpt' impath = '/home/mc457/Images/ImageForensics/RealExamplesNoPlots' outfigpath = '/home/mc457/Workspace/AcrcTest.png' nruns = 10 imtestoutpath = '/home/mc457/Workspace/Scratch' # images saved only when nruns == 1 # -------------------------------------------------- print('load model and parameters') # -------------------------------------------------- from Models import Model model = Model(nchannels, imcropsize) saver = tf.train.Saver() sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) # config parameter needed to save variables when using GPU saver.restore(sess, modelpath) print("Model restored.") # -------------------------------------------------- print('test') # -------------------------------------------------- classes = os.listdir(impath) impaths = [] for c in classes:
embedding.getVocabSize(sys.argv[1], int(sys.argv[2])) print("vocab size", embedding.vocSize) print("building embedding layer") embedding.build() model = m.LSTM_keras(num_layers=int(sys.argv[5]), embeddingLayerClass=embedding, max_seq_length=int(sys.argv[4]), cSize=int(sys.argv[6]), num_epochs=int(sys.argv[7]), loss_function=sys.argv[8], optimizer=sys.argv[9]) print("preparing data file") model.prepareData(sys.argv[1], int(sys.argv[2])) model.buildModel() print("loading training data") model.training_vectors = m.unpickleData("training_instances/" + sys.argv[10]) print("length of training", len(model.training_vectors)) print("loading testing data") model.testing_vectors = m.unpickleData("training_instances/" + sys.argv[11]) print("length of testing", len(model.testing_vectors)) print("training") model.train(sys.argv[1]) print("testing") if "embed" not in sys.argv[3]: model.test_eos_w2v() else: model.test_eos_embed()