def training_step(self, batch, batch_idx): image, mask = batch mask = mask.unsqueeze(1) output = self.forward(image) loss = self.loss(output, mask) dice = metric(torch.sigmoid(output), mask, 0.5) return {'loss': loss, 'dice': dice}
def graph(self, datax, datay, fname=None, series=[]): fx, fy = metrics.metric(datax), metrics.metric(datay) pyplot.clf() pyplot.xlabel(str(fx)), pyplot.ylabel(str(fy)) runsets = self.makerunsets(series, runfilter) for key, runset in runsets: x, y = zip(*[(fx(run), fy(run)) for run in runset]) pyplot.plot(x, y, '-o', label=str(key)) pyplot.legend(loc=0) if fname: pyplot.savefig(os.path.join(config.fullrunsdir, self.runid, fname)) else: pyplot.show()
def process(self, datas, series=[], runfilter=None, datafilter=None): fs = [metrics.metric(data) for data in datas] runsets = self.makerunsets(series, runfilter, datafilter) print ', '.join([str(d) for d in datas]) for key, runset in runsets: print ', '.join(['%s: %s' % (s,k) for s,k in zip(series,key)]) for run in runset: print ', '.join([str(f(run)).rjust(6) for f in fs])
def get_account_eth_balance(address=get_secret( key=os.environ['PAC_FUNDER_PUBKEY_SECRET'])) -> float: token_name = 'Ethereum' token_symbol = 'ETH' token_decimals = 18 DECIMALS = 10**token_decimals raw_balance = get_eth_balance(address) balance = raw_balance / DECIMALS logging.info( f"Balance of {address}: {balance} {token_name} ({token_symbol})") metric(metric_name=f"orchid.pac.balance.{token_symbol.lower()}", value=balance, tags=[ f'account:{address}', f'token_name:{token_name}', f'token_symbol:{token_symbol}', f'token_decimals:{token_decimals}', ]) return balance
def process_simple(self, ids=[], datas=[], series=[], runfilter=None, datafilter=None, aggr=aggregate.concat): fs = [metrics.metric(data) for data in datas] runkey = lambda run: map(lambda m: metrics.evalmetric(m, run), series) runsets = self.makerunsets(ids, runfilter, datafilter) runseries = self.makerunsets(series, runfilter, datafilter) print 'Data:' print '\\begin{itemize}' for d in datas: print '\item %s' % d print '\\end{itemize}' print 'Series:' print '\\begin{itemize}' count = 0 for i, (key, runset) in enumerate(runseries): print '\item S%s: %s' % (i + 1, ', '.join( ['%s: %s' % (s, k) for s, k in zip(series, key)])) count += 1 print '\\end{itemize}' print '\\begin{tabular}{|%s|}' % ('c' * len(ids) + (('|' + ('c' * (len(datas)))))) grouped_runsets = [] for key, runset in runsets: grouped_runsets.append(list(groupby_sorted(runset, key=runkey))) for serieindex in range(count): print '\hline' print '\\multicolumn{%s}{|c|}{Id} & ' % len( ids) + ' & ' + '\\multicolumn{%s}{|c|}{S%s}' % (len(datas), serieindex) print '\\\\' print ' & ' + ' & '.join(list([str(data) for data in datas]) * 1) print '\\\\' print '\hline' for grouped_runset, (key, runset) in zip(grouped_runsets, runsets): gk, rungroup = grouped_runset[serieindex] print ' & '.join([str(k).replace('\\', '') for k in key] \ + [str(aggr(map(f,rungroup))).rjust(4) for f in fs]) \ + '\n\\\\' print '\hline' print '\hline \n \\end{tabular}'
def runCascade(self, C): cas = C idx = [] values = [] met = metrics.metric(cas.getGraph()) while True: try: cas.next() met.add(cas.getInfectedNode()) values.append(met.asMap()) idx.append(cas.getStep()) except StopIteration: break return idx, values
def process(self, ids=[], datas=[], series=[], runfilter=None, datafilter=None, aggr=aggregate.concat): fs = [metrics.metric(data) for data in datas] runkey = lambda run: map(lambda m: metrics.evalmetric(m, run), series) runsets = self.makerunsets(ids, runfilter, datafilter) runseries = self.makerunsets(series, runfilter, datafilter) print 'Data:' print '\\begin{itemize}' for d in datas: print '\item %s' % d print '\\end{itemize}' print 'Series:' print '\\begin{itemize}' count = 0 for i, (key, runset) in enumerate(runseries): print '\item S%s: %s' % (i + 1, ', '.join( ['%s: %s' % (s, k) for s, k in zip(series, key)])) count += 1 print '\\end{itemize}' print '\\begin{tabular}{|%s|}' % ('c' * len(ids) + (('|' + ('c' * (len(datas)))) * count)) print '\hline' print '\\multicolumn{%s}{|c|}{Id} & ' % len(ids) + ' & '.join([ '\\multicolumn{%s}{|c|}{S%s}' % (len(datas), i + 1) for i in range(count) ]) print '\\\\' print ' & ' + ' & '.join(list([str(data) for data in datas]) * count) print '\\\\' print '\hline' # Rungroup: las 3 corridas de una determinada densidad for key, runset in runsets: print ' & '.join([str(k).replace('\\', '') for k in key] \ + flatten([[str(aggr(map(f,rungroup))).rjust(4) for f in fs] for k,rungroup in groupby_sorted(runset, key=runkey)])) \ + '\n\\\\' print '\hline \n \\end{tabular}'
def runCascade(self, C): cas = C idx = [] values = [] # met = metrics.metric(cas.getGraph(), time_format="%Y-%m-%d") met = metrics.metric(cas.getGraph(), time_format=self.options.datetimeformat) while True: try: cas.next() met.add(cas.getInfectedNode(), cas.getStepTime(), cas.getTag()) values.append(met.asMap()) idx.append(cas.getStep()) except StopIteration: break return idx, values
def paralled_train(dropout_keep_prob=0.5, edge_size=48, learning_rate=1e-6, n_epochs=20, batch_size=30, n_classes=2, show_batch_result=False, num_gpus=4): print locals() batch_size *= num_gpus from data_reader import load_train_or_val_dataset_full from metrics import metric, metric_ from glob import glob from tqdm import tqdm import os path = os.path.join(os.path.dirname(os.getcwd()), 'output') train_data_files = glob(path + '/train_*.h5') val_data_files = glob(path + '/val_*.h5') print "...... building model ......" with tf.Session() as sess: with tf.device("/cpu:0"): opt = tf.train.AdamOptimizer(learning_rate=learning_rate) print "[x] building model on gpu towers ......" models = [] for gpu_id in range(num_gpus): with tf.device("/gpu:%d" % gpu_id): print "[x] tower:%d ......" % gpu_id with tf.name_scope("tower_%d" % gpu_id): with tf.variable_scope("cpu_variables", reuse=gpu_id > 0): x = tf.placeholder( tf.float32, [None, edge_size, edge_size, edge_size]) y = tf.placeholder(tf.float32, [None, n_classes]) keep_prob = tf.placeholder(tf.float32, name="keep_prob") y_conv = c3d_net(x, dropout_keep_prob=keep_prob) y_prob = tf.nn.softmax(y_conv) cost = tf.reduce_mean(tf.square(y - y_prob)) grads = opt.compute_gradients(cost) models.append( (x, y, keep_prob, y_prob, cost, grads)) print "[x] building model on gpu tower done ......" # constants best_train_f1 = 0. best_train_epoch = 0 best_val_f1 = 0. best_val_cost = 1000. best_val_epoch = 0 train_accs = [] val_accs = [] train_costs = [] val_costs = [] train_f1s = [] val_f1s = [] print "[x] reduce model on cpu ......" print "[!] have been created %d models ......" % (len(models)) tower_x, tower_y, tower_keep_prob, tower_probs, tower_costs, tower_grads = zip( *models) aver_loss_op = tf.reduce_mean(tower_costs) apply_gradient_op = opt.apply_gradients( average_gradients(tower_grads)) all_y = tf.reshape(tf.stack(tower_y, 0), [-1, n_classes]) all_pred = tf.reshape(tf.stack(tower_probs, 0), [-1, n_classes]) correct_pred = tf.equal(tf.argmax(all_y, 1), tf.argmax(all_pred, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, "float")) print "[x] reduce model on cpu done ..." print "...... loading dataset ......" train_set_x, train_set_y = load_train_or_val_dataset_full( train_data_files) n_train_examples = train_set_x.shape[0] val_set_x, val_set_y = load_train_or_val_dataset_full( val_data_files) n_validation_examples = val_set_x.shape[0] print "[x] run init op ..." sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() print "[x] run train op ..." for epoch_i in tqdm(range(n_epochs)): payload_per_gpu = batch_size / num_gpus train_cost = 0. train_acc = 0. train_f1 = 0. n_train_examples_total = 0. y_probs_train = [] y_true_train = [] y_probs_val = [] y_true_val = [] for mini_batch in range(n_train_examples // batch_size): batch_xs = train_set_x[mini_batch * batch_size:(mini_batch + 1) * batch_size] batch_ys = train_set_y[mini_batch * batch_size:(mini_batch + 1) * batch_size] input_dict = {} input_dict = feed_all_gpu(input_dict, models, payload_per_gpu, batch_xs, batch_ys, dropout_keep_prob) _, t_loss, t_acc, y_p = sess.run( [apply_gradient_op, aver_loss_op, accuracy, all_pred], feed_dict=input_dict) y_probs_train.append(y_p) y_true_train.append(batch_ys) train_cost += t_loss train_acc += t_acc if show_batch_result: print 'epoch %d,, minibatch %d, train acc = %f %%, train_cost = %f ,train_f1 = %f' % ( epoch_i, mini_batch, t_acc * 100, t_loss, metric_(y_p, batch_ys)) train_acc /= (n_train_examples_total // batch_size) train_cost /= (n_train_examples_total // batch_size) train_f1 = metric(y_probs_train, y_true_train) # validation files validation_cost = 0. validation_acc = 0. validation_f1 = 0. for mini_batch in range(n_validation_examples // batch_size): batch_xs = val_set_x[mini_batch * batch_size:(mini_batch + 1) * batch_size] batch_ys = val_set_y[mini_batch * batch_size:(mini_batch + 1) * batch_size] input_dict = {} input_dict = feed_all_gpu(input_dict, models, payload_per_gpu, batch_xs, batch_ys, 1.0) v_loss, v_acc, v_prob = sess.run( [aver_loss_op, accuracy, all_pred], feed_dict=input_dict) y_probs_val.append(v_prob) y_true_val.append(batch_ys) validation_cost += v_loss validation_acc += v_acc validation_acc /= (n_validation_examples // batch_size) validation_cost /= (n_validation_examples // batch_size) validation_f1 = metric(y_probs_val, y_true_val) if train_f1 > best_train_f1: best_train_f1 = train_f1 best_train_epoch = epoch_i if validation_f1 > best_val_f1: best_val_f1 = validation_f1 best_val_epoch = epoch_i saver.save( sess, 'trained_model/3dcnn_m1_f1_lr_%s_dp_%s.ckpt' % (learning_rate, dropout_keep_prob)) if validation_cost < best_val_cost: best_val_cost = validation_cost saver.save( sess, 'trained_model/3dcnn_m1_cost_lr_%s_dp_%s.ckpt' % (learning_rate, dropout_keep_prob)) train_accs.append(train_acc) train_costs.append(train_cost) train_f1s.append(train_f1) val_accs.append(validation_acc) val_costs.append(validation_cost) val_f1s.append(validation_f1) print '### epoch_%d,training data f1 = %f, cost = %f, acc = %f %%' % ( epoch_i, train_f1, train_cost, train_acc * 100) print '### epoch_%d,validation data f1 = %f, cost = %f,acc = %f %%' % ( epoch_i, validation_f1, validation_cost, validation_acc * 100) print 'done' print 'best f1 on training data = %f @ epoch %d' % ( best_train_f1, best_train_epoch) print 'best f1 on validation data = %f @ epoch %d' % ( best_val_f1, best_val_epoch) plt.plot_cost( train_accs, n_epochs, 'train_accs_m1_lr_%s_dp_%s' % (learning_rate, dropout_keep_prob)) plt.plot_cost( train_costs, n_epochs, 'train_cost_m1_lr_%s_dp_%s' % (learning_rate, dropout_keep_prob)) plt.plot_cost( train_f1s, n_epochs, 'train_f1_m1_lr_%s_dp_%s' % (learning_rate, dropout_keep_prob)) plt.plot_cost( val_accs, n_epochs, 'val_accs_m1_lr_%s_dp_%s' % (learning_rate, dropout_keep_prob)) plt.plot_cost( val_costs, n_epochs, 'val_costs_m1_lr_%s_dp_%s' % (learning_rate, dropout_keep_prob)) plt.plot_cost( val_f1s, n_epochs, 'val_f1s_m1_lr_%s_dp_%s' % (learning_rate, dropout_keep_prob)) return train_accs, train_costs, train_f1s, val_accs, val_costs, val_f1s, best_train_f1, best_val_f1
# print(len(test_data_generator.filenames)) # print(test_data_generator.filenames) prediction = model.predict_generator(test_data_generator, steps=len( test_data_generator.filenames)) f1score = f1_score(prediction, val_labels) print("F1 Score is", f1score) test_y = [validation_labels] pred_y = [prediction] roc_val = roc_auc_score(test_y, pred_y) print('Logistic: ROC AUC=%.3f' % (roc_val)) lr_fpr, lr_tpr, _ = roc_curve(test_y, pred_y) pyplot.plot(lr_fpr, lr_tpr, marker='.', label='Logistic') pyplot.xlabel('False Positive Rate') pyplot.ylabel('True Positive Rate') filenames = test_data_generator.filenames labels = test_data_generator.class_indices labels = dict((v, k) for k, v in labels.items()) # print(labels) predictions = [labels[k] for k in prediction] results = pd.DataFrame({ "Filename": filenames, "Predictions": predictions, "Label": val_labels }) results.to_csv("ut_ci_results30.csv", index=False) metric("ut_ci_results30.csv")
def train_c3d(dropout_keep_prob=0.8, edge_size=36, learning_rate=0.0001, n_epochs=10, batch_size=100, n_classes=2, show_batch_result=True): """ :param dropout_keep_prob: :param learning_rate: :param n_epochs: :param batch_size: :param n_classes: :return: """ print '...... loading dataset ......' from data_reader import load_train_or_val_dataset_single from metrics import metric, metric_ from glob import glob from tqdm import tqdm import os import numpy as np path = os.path.join(os.path.dirname(os.getcwd()), 'output') train_data_files = glob(path + '/train_*.h5') val_data_files = glob(path + '/val_*.h5') print '...... building model ......' x = tf.placeholder(tf.float32, [None, edge_size, edge_size, edge_size]) y = tf.placeholder(tf.float32, [None, n_classes]) keep_prob = tf.placeholder(tf.float32) y_conv = residual_inception_c3d_net(x, dropout_prob=keep_prob) y_prob = tf.nn.softmax(y_conv) cost = tf.reduce_mean(tf.square(y - y_prob)) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) correct_prediction = tf.equal(tf.argmax(y_prob, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) best_train_f1 = 0. best_train_epoch = 0 best_val_f1 = 0. best_val_cost = 1000. best_val_epoch = 0 train_accs = [] val_accs = [] train_costs = [] val_costs = [] train_f1s = [] val_f1s = [] print '...... initializing ......' init = tf.initialize_all_variables() saver = tf.train.Saver() with tf.Session() as sess: print '...... initializing variables .....' sess.run(init) print '...... start to train sliding_window ......' for epoch_i in tqdm(range(n_epochs)): train_cost = 0. train_acc = 0. train_f1 = 0. n_train_examples_total = 0. y_probs_train = [] y_true_train = [] y_probs_val = [] y_true_val = [] # train files for i in range(len(train_data_files)): print 'training epoch %d, datafile %s : %d' % ( epoch_i, train_data_files[i], i) train_set_x, train_set_y = load_train_or_val_dataset_single( train_data_files[i]) n_train_examples = train_set_x.shape[0] n_train_examples_total += n_train_examples # train sliding_window for mini_batch in range(n_train_examples // batch_size): #print 'training epoch %d, minibatch %d' % (epoch_i, mini_batch) batch_xs = train_set_x[mini_batch * batch_size:(mini_batch + 1) * batch_size] batch_ys = train_set_y[mini_batch * batch_size:(mini_batch + 1) * batch_size] _, t_loss, t_acc, y_p = sess.run( [optimizer, cost, accuracy, y_prob], feed_dict={ x: batch_xs, y: batch_ys, keep_prob: dropout_keep_prob }) #print y_p y_probs_train.append(y_p) y_true_train.append(batch_ys) train_cost += t_loss train_acc += t_acc if show_batch_result: print 'epoch %d, data_file %s - %d, minibatch %d, train acc = %f %%, train_cost = %f ,train_f1 = %f' % ( epoch_i, train_data_files[i], i, mini_batch, t_acc * 100, t_loss, metric_(y_p, batch_ys)) if n_train_examples % batch_size != 0: batch_xs = train_set_x[(train_set_x.shape[0] // batch_size) * batch_size:] batch_ys = train_set_y[(train_set_x.shape[0] // batch_size) * batch_size:] _, t_loss, t_acc, y_p = sess.run( [optimizer, cost, accuracy, y_prob], feed_dict={ x: batch_xs, y: batch_ys, keep_prob: dropout_keep_prob }) #print y_p y_probs_train.append(y_p) y_true_train.append(batch_ys) train_cost += t_loss train_acc += t_acc if show_batch_result: print 'epoch %d, data_file %s - %d, minibatch %d, train acc = %f %%, train_cost = %f, train_f1 = %f' % ( epoch_i, train_data_files[i], i, (train_set_x.shape[0] // batch_size) + 1, t_acc * 100, t_loss, metric_(y_p, batch_ys)) train_acc /= (n_train_examples_total // batch_size) train_cost /= (n_train_examples_total // batch_size) train_f1 = metric(y_probs_train, y_true_train) # validation files validation_cost = 0. validation_acc = 0. validation_f1 = 0. n_val_examples_total = 0. for i in range(len(val_data_files)): val_set_x, val_set_y = load_train_or_val_dataset_single( val_data_files[i]) n_val_examples = val_set_x.shape[0] n_val_examples_total += n_val_examples # validate model on validation dataset for mini_batch in range(n_val_examples // batch_size): batch_xs = val_set_x[batch_size * mini_batch:batch_size * (mini_batch + 1)] batch_ys = val_set_y[batch_size * mini_batch:batch_size * (mini_batch + 1)] v_loss, v_acc, v_prob = sess.run([cost, accuracy, y_prob], feed_dict={ x: batch_xs, y: batch_ys, keep_prob: 1.0 }) y_probs_val.append(v_prob) y_true_val.append(batch_ys) validation_cost += v_loss validation_acc += v_acc validation_acc /= (n_val_examples_total // batch_size) validation_cost /= (n_val_examples_total // batch_size) validation_f1 = metric(y_probs_val, y_true_val) if train_f1 > best_train_f1: best_train_f1 = train_f1 best_train_epoch = epoch_i if validation_f1 > best_val_f1: best_val_f1 = validation_f1 best_val_epoch = epoch_i saver.save( sess, 'trained_model/3dcnn_m2_f1_lr_%s_dp_%s.ckpt' % (learning_rate, dropout_keep_prob)) if validation_cost < best_val_cost: best_val_cost = validation_cost saver.save( sess, 'trained_model/3dcnn_m2_cost_lr_%s_dp_%s.ckpt' % (learning_rate, dropout_keep_prob)) train_accs.append(train_acc) train_costs.append(train_cost) train_f1s.append(train_f1) val_accs.append(validation_acc) val_costs.append(validation_cost) val_f1s.append(validation_f1) print '### epoch_%d,training data f1 = %f, cost = %f, acc = %f %%' % ( epoch_i, train_f1, train_cost, train_acc * 100) print '### epoch_%d,validation data f1 = %f, cost = %f,acc = %f %%' % ( epoch_i, validation_f1, validation_cost, validation_acc * 100) print 'done' print 'best f1 on training data = %f @ epoch %d' % (best_train_f1, best_train_epoch) print 'best f1 on validation data = %f @ epoch %d' % (best_val_f1, best_val_epoch) plt.plot_cost( train_accs, n_epochs, 'train_accs_m2_lr_%s_dp_%s' % (learning_rate, dropout_keep_prob)) plt.plot_cost( train_costs, n_epochs, 'train_cost_m2_lr_%s_dp_%s' % (learning_rate, dropout_keep_prob)) plt.plot_cost( train_f1s, n_epochs, 'train_f1_m2_lr_%s_dp_%s' % (learning_rate, dropout_keep_prob)) plt.plot_cost( val_accs, n_epochs, 'val_accs_m2_lr_%s_dp_%s' % (learning_rate, dropout_keep_prob)) plt.plot_cost( val_costs, n_epochs, 'val_costs_m2_lr_%s_dp_%s' % (learning_rate, dropout_keep_prob)) plt.plot_cost( val_f1s, n_epochs, 'val_f1s_m2_lr_%s_dp_%s' % (learning_rate, dropout_keep_prob)) return train_accs, train_costs, train_f1s, val_accs, val_costs, val_f1s, best_train_f1, best_val_f1
def update_statuses(): logging.debug('update_statuses()') dynamodb = boto3.resource('dynamodb') table = dynamodb.Table(os.environ['TABLE_NAME']) response = table.scan() counts = {} mapping = get_product_id_mapping() for status in ('confirmed', 'unconfirmed', 'unknown', 'pending'): counts[status] = {} for tier in mapping: price = mapping[tier] counts[status][price] = 0 funder_pubkey = get_secret(key=os.environ['PAC_FUNDER_PUBKEY_SECRET']) for item in response['Items']: signer = item['signer'] price = item['price'] push_txn_hash = item['push_txn_hash'] status = item['status'] balance = float(item.get('balance', 0)) escrow = float(item.get('escrow', 0)) blocknum = get_block_number() new_status = get_transaction_status(push_txn_hash, blocknum) if new_status in counts: counts[new_status][price] = counts[new_status].get(price, 0) + 1 else: counts[new_status] = { price: 1, } if status != 'confirmed': creation_etime = item.get('creation_etime', 0) epoch_time = int(time.time()) age = epoch_time - creation_etime if new_status != 'confirmed' and age >= 10*60*60: # 10 hours in seconds logging.warning( f'PAC with funder: {funder_pubkey} signer: {signer} balance: {balance} and ' f'escrow: {escrow} has status: {new_status} and age: {age} >= 10 hours. Deleting.' ) table.delete_item( Key={ 'price': price, 'signer': signer, } ) recycle_account(funder=funder_pubkey, signer=signer) continue if status != new_status: # non-confirmed status -> different status, possibly confirmed logging.debug( f'Changing {push_txn_hash} with signer:{signer} and price:{price} ' f'from {status} to {new_status}' ) table.update_item( Key={ 'price': price, 'signer': signer, }, UpdateExpression="SET #status = :new_status", ExpressionAttributeValues={ ':new_status': new_status, ':old_status': status, }, ExpressionAttributeNames={ "#status": "status" }, ConditionExpression="#status = :old_status", ) if new_status == 'confirmed': # non-confirmed status -> confirmed token_name = get_token_name() token_symbol = get_token_symbol() token_decimals = get_token_decimals() total = balance + escrow min_escrow = get_min_escrow() if escrow <= min_escrow: # non-confirmed status -> confirmed, bad escrow logging.warning( f'PAC with funder: {funder_pubkey} signer: {signer} balance: {balance} and ' f'escrow: {escrow} has escrow <= min escrow of {min_escrow}. Deleting.' ) table.delete_item( Key={ 'price': price, 'signer': signer, } ) recycle_account(funder=funder_pubkey, signer=signer) else: # non-confirmed status -> confirmed, good escrow metric( metric_name='orchid.pac', value=total, tags=[ f'funder:{funder_pubkey}', f'signer:{signer}', f'price:{price}', f'balance:{balance}', f'escrow:{escrow}', f'lottery_contract:{os.environ["LOTTERY"]}', f'token_name:{token_name}', f'token_symbol:{token_symbol}', f'token_decimals:{token_decimals}', ], ) else: # non-confirmed status -> same status, not confirmed logging.debug(f'No need to update {push_txn_hash} with signer:{signer} and price:{price} from {status}') else: # Already confirmed logging.debug(f'{push_txn_hash} with signer:{signer} and price:{price} already has status:{status}') for status in counts: for price in counts[status]: value = counts[status][price] logging.debug(f'There are {value} ${price} PACs with a status of {status}') metric( metric_name=f'orchid.pac.pool.{status}', value=value, tags=[ f'funder:{funder_pubkey}', f'price:{price}', ], )
testData = data.loc[testIndex] # Reset index in training data set trainData.reset_index(inplace = True) training_counts = trainData['v1'].value_counts().tolist() print("\nTraining data set: 80% of data set\nNumber of spam: ", training_counts[0],"\nNumber of ham: ", training_counts[1]) # Reset index in testing data set testData.reset_index(inplace = True) testing_counts = testData['v1'].value_counts().tolist() print("\nTesting data set: 20% of data set\nNumber of spam: ", testing_counts[0],"\nNumber of ham: ", testing_counts[1]) # Training the TF-IDF model tfidf = TFIDF_model(trainData) tfidf.TF_and_IDF() tfidf.TFIDF() metric(testData['v1'], tfidf.test(testData['v2'])) # Running examples message1 = 'OMW. I will call you later.' process1 = process(message1) print("\nMessage 1: ", message1, "\nSpam = 1, Ham = 0: ", tfidf.classify(process1)) message2 = 'I will text you when I finish work' process2 = process(message2) print("\nMessage 2: ", message2, "\nSpam = 1, Ham = 0: ", tfidf.classify(process2)) message3 = 'You win a trip to Europe! Call now to redeem' process3 = process(message3) print("\nMessage 3: ", message3, "\nSpam = 1, Ham = 0: ", tfidf.classify(process3)) message4 = 'Text or call now for a week of FREE membership.'
for a in Z: x = cin.next() cid = x[0] try: a0 = a[0] except IndexError: a0 = a cout.writerow([cid, a0]) if __name__ == '__main__': method = sys.argv[1] mname = NAMES[method] full_file = sys.argv[2] basefile = full_file[:-4] full_cidfile = sys.argv[3] outbase = sys.argv[4] colname = sys.argv[5] X, Y, headers = get_XY(basefile + '_train.csv') pp = eval(method)(X, Y, basefile, headers) del X, Y import metrics metrics.metric(pp, basefile + '_test.csv', '../data/%s' % mname) X, Y, headers = get_XY(full_file) Z = pp.predict(X) write_score(Z, outbase + '%s - Score.csv' % mname, full_cidfile, colname)