def _single_user_single_test(self, history_events, predict_events): # pred_next_purchase_metric = [] # pred_whole_day_metric = [] # Get next purchase item # print('[Input]', history_events, predict_events) history_events = list(filter(None, history_events))[-self.lastN:] predict_events = list(filter(None, predict_events)) purchase_items = [] for idx, event in enumerate(predict_events): behavior, item = event.split(':', 1) if behavior == 'revenue': purchase_items.append((idx, item)) # topN res pred = self.predict(history_events, self.topN) # predict the next purchase item if purchase_items: gt = set([x for i, x in purchase_items]) metrics_map = ['HR', 'MRR', 'NDCG'] out = metrics(gt, pred, metrics_map) # print('[p] :', gt, pred, out) self.pred_next_purchase_metric.append(out) # predict the whole day items gt = set([e.split(':', 1)[1] for e in predict_events]) metrics_map = ['P&R', 'MAP'] out = metrics(gt, pred, metrics_map) # print('[whole] :', gt, pred, out) self.pred_whole_day_metric.append(out[0] + [out[1]])
def metrics(self, op): """ evaluation :param op: <dtype: str> """ # if op == "train": # metrics.metrics(self.y_tr, self.y_pred_tr) if op == "test": metrics.metrics(self.y_te, self.y_pred_te)
def extract_features_percentage(classifier, percentage, X, Y, extraction_type): tiempo_i = time.time() Errores = np.ones(10) Metrics = np.zeros((10, 5)) j = 0 kf = KFold(n_splits=10) clf = classifier ex = extract_features(extraction_type, int(X.shape[1] * percentage / 100)) if (extraction_type == "pca"): ex = ex.fit(X) elif (extraction_type == "lda"): ex = ex.fit(X, Y.astype(int)) X_ex = ex.transform(X) for train_index, test_index in kf.split(X_ex): X_train, X_test = X_ex[train_index], X_ex[test_index] y_train, y_test = Y[train_index], Y[test_index] model = clf.fit(X_train, y_train) y_pred = model.predict(X_test) Errores[j] = 1 - metrics(y_test, y_pred)[0] Metrics[j, :] = metrics(y_test, y_pred) j += 1 print("\nError de validación aplicando " + str(extraction_type) + " at " + str(percentage) + "%: " + str(np.mean(Errores)) + "+/-" + str(np.std(Errores))) print("\nEficiencia en validación aplicando " + str(extraction_type) + " at " + str(percentage) + "%: " + str((1 - np.mean(Errores)) * 100) + "%") print("\nTiempo total de ejecución: " + str(time.time() - tiempo_i) + " segundos.") MetricsMean = meanMetrics(Metrics) MetricsStd = stdMetrics(Metrics) printMetrics(MetricsMean) print("\nDesviaciones Estandard") printMetrics(MetricsStd) return ex
def csvWriter(recordings, csv_writer, params): groundtruthBoundariesAll, detectedBoundariesAll = [], [] for ii in params[3]: recording = recordings[ii] groundtruthBoundariesSong, detectedBoundariesSong, _, _ = detectedBoundariesOutput( recording, varin) groundtruthBoundariesAll.append(groundtruthBoundariesSong) detectedBoundariesAll.append(detectedBoundariesSong) # for tolerance in [0.02,0.04,0.06,0.08,0.10,0.20]: # for tolerance in [0.04]: sumNumGroundtruthBoundaries, sumNumDetectedBoundaries, sumNumCorrect = \ boundariesStat(groundtruthBoundariesAll,detectedBoundariesAll,varin['tolerance']) # csv_writer.writerow([tolerance,sumNumDetectedBoundaries, sumNumGroundtruthBoundaries, sumNumCorrect]) HR, OS, FAR, F, R, deletion, insertion = \ metrics.metrics(sumNumDetectedBoundaries, sumNumGroundtruthBoundaries, sumNumCorrect) print( 'HR %.3f, OS %.3f, FAR %.3f, F %.3f, R %.3f, deletion %i, insertion %i' % (HR, OS, FAR, F, R, deletion, insertion)) print( 'ground truth %i, detected %i, correct %i' % (sumNumGroundtruthBoundaries, sumNumDetectedBoundaries, sumNumCorrect))
def compare_scores(snippets): corpus = ''.join(snippets) corpus_list = corpus.split('.') scores_dict = dict() F1avg = 0 valid_F1 = 0 happ_avg_time = 0 es_avg_time = 0 for query in snippets: count = 0 print("iteration number:", valid_F1) print(query) # preprocess the query if config_params["es_preprocess"]: query = " ".join(preprocess_sentence(query)) if (len(query.split()) < 4): count += 1 continue scores = metrics.metrics(query) precision = scores[0] / (scores[0] + scores[1] + 1e-9) recall = scores[0] / (scores[0] + scores[2] + 1e-9) F1 = 2 * precision * recall / (precision + recall + 1e-9) scores_dict[query] = [F1, precision, recall, scores[4], scores[5]] valid_F1 += 1 happ_avg_time += scores[4] es_avg_time += scores[5] F1avg += F1 print('scores:', scores) print('F1-score:', F1, 'precision:', precision, 'recall:', recall) print() print(F1avg / (valid_F1), happ_avg_time / valid_F1, es_avg_time / valid_F1) return scores_dict
def csvWriter(recordings,csv_writer,params): groundtruthBoundariesAll, detectedBoundariesAll = [], [] for ii in params[3]: recording = recordings[ii] groundtruthBoundariesSong, detectedBoundariesSong, _,_ = detectedBoundariesOutput(recording,varin) groundtruthBoundariesAll.append(groundtruthBoundariesSong) detectedBoundariesAll.append(detectedBoundariesSong) # for tolerance in [0.02,0.04,0.06,0.08,0.10,0.20]: # for tolerance in [0.04]: sumNumGroundtruthBoundaries, sumNumDetectedBoundaries, sumNumCorrect = \ boundariesStat(groundtruthBoundariesAll,detectedBoundariesAll,varin['tolerance']) # csv_writer.writerow([tolerance,sumNumDetectedBoundaries, sumNumGroundtruthBoundaries, sumNumCorrect]) HR, OS, FAR, F, R, deletion, insertion = \ metrics.metrics(sumNumDetectedBoundaries, sumNumGroundtruthBoundaries, sumNumCorrect) print ('HR %.3f, OS %.3f, FAR %.3f, F %.3f, R %.3f, deletion %i, insertion %i' % (HR, OS, FAR, F, R, deletion, insertion)) print ('ground truth %i, detected %i, correct %i' % (sumNumGroundtruthBoundaries,sumNumDetectedBoundaries,sumNumCorrect))
def calculate_fitness(self): measurements = metrics.metrics(self.to_level()) # Default fitness function: Just some arbitrary combination of a few criteria. Is it good? Who knows? # STUDENT Add more metrics? # STUDENT Improve this with any code you like coefficients = dict(meaningfulJumpVariance=0.5, negativeSpace=0.6, pathPercentage=0.5, emptyPercentage=0.8, linearity=-0.7, solvability=2.0) penalties = 0 # STUDENT For example, too many stairs are unaesthetic. Let's penalize that # Holes should be decently spread out if len(list(filter(lambda de: de[1] == "0_hole", self.genome))) > 10: penalties += 1 # Platform adds verticality to level if len(list(filter(lambda de: de[1] == "1_platform", self.genome))) > 5: penalties += 2 # We like coins if len(list(filter(lambda de: de[1] == "3_coins", self.genome))) > 15: penalties += 3 #We don't like stairs if len(list(filter(lambda de: de[1] == "6_stairs", self.genome))) > 5: penalties -= 2 # Pipes should be kept minimal if len(list(filter(lambda de: de[1] == "7_pipe", self.genome))) > 3: penalties -= 2 # STUDENT If you go for the FI-2POP extra credit, you can put constraint calculation in here too and cache it in a new entry in __slots__. self._fitness = sum( map(lambda m: coefficients[m] * measurements[m], coefficients)) + penalties return self
def calculate_fitness(self): measurements = metrics.metrics(self.to_level()) # Default fitness function: Just some arbitrary combination of a few criteria. Is it good? Who knows? # STUDENT Add more metrics? # STUDENT Improve this with any code you like #print(measurements) coefficients = dict(meaningfulJumpVariance=0.5, negativeSpace=0.6, pathPercentage=0.5, emptyPercentage=0.6, linearity=-0.5, solvability=2.0, decorationPercentage=0.0, leniency=0.0, meaningfulJumps=0.01, jumps=0.0, jumpVariance=0.0, length=0.0) penalties = 0 # STUDENT For example, too many stairs are unaesthetic. Let's penalize that if len(list(filter(lambda de: de[1] == "6_stairs", self.genome))) > 5: penalties -= 2 # penalize for too many pipes if len(list(filter(lambda de: de[1] == "7_pipe", self.genome))) > 3: penalties -= 2 num_tallpipes = 0 tall_pipes_coefficient = 0.5 sumDescent = 0 sumAscent = 0 lackOfAscent_coefficient = 0.2 for de in self.genome: if de[1] == "6_stairs": if de[3] == 1: sumAscent += 1 else: sumDescent += 1 if de[1] == "7_pipe": if de[2] > 4: num_tallpipes # penalize if there's more descending stairs than ascending stairs if sumDescent > sumAscent: penalties -= lackOfAscent_coefficient # penalize for pipes too tall penalties -= tall_pipes_coefficient * num_tallpipes # Reward levels with good jumps jumpVal = 1 if measurements['meaningfulJumps'] >= 0.0: jumpVal += coefficients['meaningfulJumps'] * measurements[ 'meaningfulJumps'] # STUDENT If you go for the FI-2POP extra credit, you can put constraint calculation in here too and cache it in a new entry in __slots__. self._fitness = sum( map(lambda m: coefficients[m] * measurements[m], coefficients)) + penalties + jumpVal return self
def evaluating_doc_scores(docs, annotype, metric_name): doc_scores = {} for docid in docs: doc = docs[docid] gt = doc.get_groundtruth(annotype) if not gt or annotype not in doc.markups: if annotype not in doc.markups: # print docid pass continue markups = doc.markups[annotype] scores = [] for wid, spans in markups.items(): if len(spans) == 0: # The worker has no annotation for this doc continue score = metrics.metrics(spans, gt, doc.ntokens, metric_name) if np.isnan(score): continue scores.append(score) doc_scores[docid] = np.mean(scores) return doc_scores
def calculate_fitness(self): measurements = metrics.metrics(self.to_level()) # Default fitness function: Just some arbitrary combination of a few criteria. Is it good? Who knows? # STUDENT Add more metrics? # STUDENT Improve this with any code you like coefficients = dict(meaningfulJumpVariance=0.5, negativeSpace=0.6, pathPercentage=0.5, emptyPercentage=0.6, linearity=-0.5, solvability=2.0) penalties = 0 # STUDENT For example, too many stairs are unaesthetic. Let's penalize that if len(list(filter(lambda de: de[1] == "6_stairs", self.genome))) > 5: penalties -= 2 # Incentivize lots of coins if len(list(filter(lambda de: de[1] == "3_coin", self.genome))) > 10: penalties += 4 # Heavily discourage too many pipes if len(list(filter(lambda de: de[1] == "7_pipe", self.genome))) > 6: penalties -= 4 # STUDENT If you go for the FI-2POP extra credit, you can put constraint calculation in here too and cache it in a new entry in __slots__. self._fitness = sum( map(lambda m: coefficients[m] * measurements[m], coefficients)) + penalties return self
def evaluating_worker_per_doc(docs, annotype, metric_name): worker_scores = {} for docid in docs: doc = docs[docid] gt = doc.get_groundtruth(annotype) if not gt or annotype not in doc.markups: if annotype not in doc.markups: # print docid pass continue markups = doc.markups[annotype] for wid, spans in markups.items(): if len(spans) == 0: # The worker has no annotation for this doc continue score = metrics.metrics(spans, gt, doc.ntokens, metric_name) if np.isnan(score): continue if wid in worker_scores: worker_scores[wid].append(score) else: worker_scores[wid] = [score] for wid in worker_scores: worker_scores[wid] = dict(count=len(worker_scores[wid]), score=np.mean(worker_scores[wid])) return worker_scores
def __init__(self, file_name, camp_dist): #maybe folder name instead of file_name self.camp_dist_array = camp_dist self.parser = parser('../data/xlsx/start.xlsx', '../data/csv/out.csv') self.pars() self.data = sp.genfromtxt(file_name, delimiter=',', dtype='|S10') self.metrics = metrics() self.batcher = batcher(self.data) self.tth = self.batcher.tth_create()
def select_features_number(classifier, number_features, fwd, fltg, X, Y): tiempo_i = time.time() Errores = np.ones(10) Metrics = np.zeros((10, 5)) j = 0 kf = KFold(n_splits=10) clf = classifier sf = select_features(clf, number_features, fwd, fltg) sf = sf.fit(X, Y) X_sf = sf.transform(X) for train_index, test_index in kf.split(X_sf): X_train, X_test = X_sf[train_index], X_sf[test_index] y_train, y_test = Y[train_index], Y[test_index] classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) Errores[j] = 1 - metrics(y_test, y_pred)[0] Metrics[j, :] = metrics(y_test, y_pred) j += 1 print("\nError de validación aplicando SFS: " + str(np.mean(Errores)) + "+/-" + str(np.std(Errores))) print("\nEficiencia en validación aplicando SFS: " + str((1 - np.mean(Errores)) * 100) + "%") print("\nTiempo total de ejecución: " + str(time.time() - tiempo_i) + " segundos.") MetricsMean = meanMetrics(Metrics) MetricsStd = stdMetrics(Metrics) printMetrics(MetricsMean) print("\nDesviaciones Estandard") printMetrics(MetricsStd) return sf
def _single_user_test(self, history_events, predict_events): # pred_next_purchase_metric = [] # pred_whole_day_metric = [] # Get next purchase item # print('[Input]', history_events, predict_events) history_events = list(filter(None, history_events))[-self.lastN:] predict_events = list(filter(None, predict_events)) purchase_items = [] for idx, event in enumerate(predict_events): behavior, item = event.split(':', 1) if behavior == 'revenue': purchase_items.append((idx, item)) # topN res for idx, event in enumerate(predict_events): # print(f'---------------{idx},{event}-------------', ) # check history_events while len(history_events) > self.lastN: history_events.pop(0) pred = self.predict(history_events, self.topN) # predict the next purchase item if purchase_items: gt = set([purchase_items[0][1]]) metrics_map = ['HR', 'MRR', 'NDCG'] out = metrics(set(gt), pred, metrics_map) # print('[p] :', gt, pred, out) self.pred_next_purchase_metric.append(out) # predict the whole day items gt = [e.split(':', 1)[1] for e in predict_events[idx:]] metrics_map = ['P&R', 'MAP'] out = metrics(set(gt), pred, metrics_map) # print('[whole] :', gt, pred, out) self.pred_whole_day_metric.append(out[0] + [out[1]]) # check purchase item if purchase_items and purchase_items[0][0] <= idx: purchase_items.pop(0) # prepare next history_events history_events.append(event)
def select_features_filter_percentage(classifier, percentage, X, Y): tiempo_i = time.time() Errores = np.ones(10) Metrics = np.zeros((10, 5)) j = 0 kf = KFold(n_splits=10) filter_method = SelectPercentile(mutual_info_classif, percentile=percentage) filter_method.fit(X, Y) X_sf = filter_method.transform(X) for train_index, test_index in kf.split(X_sf): X_train, X_test = X_sf[train_index], X_sf[test_index] y_train, y_test = Y[train_index], Y[test_index] classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) Metrics[j, :] = metrics(y_test, y_pred) Errores[j] = 1 - metrics(y_test, y_pred)[0] j += 1 print("\nError de validación aplicando at " + str(percentage) + "%: " + str(np.mean(Errores)) + "+/-" + str(np.std(Errores))) print("\nEficiencia en validación aplicando at " + str(percentage) + "%: " + str((1 - np.mean(Errores)) * 100) + "%") print("\nTiempo total de ejecución: " + str(time.time() - tiempo_i) + " segundos.") MetricsMean = meanMetrics(Metrics) MetricsStd = stdMetrics(Metrics) printMetrics(MetricsMean) print("\nDesviaciones Estandard") printMetrics(MetricsStd) return filter_method
def fit(self,X,Y): self.X,self.Y = X,Y metricObj = metrics(self.X,self.Y) R = radius(X,Y) self.RadiusPoint = R[0] self.RadX,self.RadY = np.float128(self.RadiusPoint[0])[0],np.float128(self.RadiusPoint[1])[0] self.RadiusPoint = (self.RadX,self.RadY) self.radiusLength = R[1] self.step = self.findFuncStep(self.RadiusPoint,self.radiusLength) self.Xfib,self.Yfib = self.fibOBJ.cordinateEachAngle(self.step,originCordinates=(float(self.RadiusPoint[0]),float(self.RadiusPoint[1]))) self.areas = self.Areas(self.X,self.Y,self.Xfib,self.Yfib) self.XY_of_CheckPoints = self.get_XY_of_CheckPoints(self.areas) self.regressions()
def get_distance(self,x,y): data = metrics(x,y) if self.metric == "euclidean": return data.euclideanDistance() elif self.metric == "manhattan": return data.manhattanDistance() elif self.metric == "minkowski": return data.minkowskiDistance() elif self.metric == "chebyhev": return data.chebyhevDistance()
def calculate_fitness(self): measurements = metrics.metrics(self.to_level()) # Default fitness function: Just some arbitrary combination of a few criteria. Is it good? Who knows? # STUDENT Add more metrics? # STUDENT Improve this with any code you like coefficients = dict(meaningfulJumpVariance=0.5, negativeSpace=0.6, pathPercentage=0.5, emptyPercentage=0.6, linearity=-0.5, solvability=2.0) penalties = 0 # STUDENT For example, too many stairs are unaesthetic. Let's penalize that if len(list(filter(lambda de: de[1] == "6_stairs", self.genome))) > 5: penalties -= 2 if len(list(filter(lambda de: de[1] == "3_coin", self.genome))) < 2: # too few coins penalties -= 2 if len(list(filter(lambda de: de[1] == "0_hole", self.genome))) < 1: # no gaps/holes penalties -= 2 if len(list(filter(lambda de: de[1] == "5_qblock", self.genome))) > 6: # too many qblocks penalties -= 2 if len(list(filter(lambda de: de[1] == "2_enemy", self.genome))) > 6: # too many enemies penalties -= 2 if len(list(filter(lambda de: de[1] == "2_enemy", self.genome))) < 2: # not enough enemies penalties -= 2 for item in list(filter(lambda de: de[1] == "0_hole", self.genome)): # connected holes try: if self.to_level()[15][item[0] + item[2]] == "-": penalties -= 6 except: pass for x in range(0, 2): # check for cluttering around mario for item in list(filter(lambda de: de[0] == x, self.genome)): if item[1] != "4_block" or item[2] != 15: penalties -= 4 # STUDENT If you go for the FI-2POP extra credit, you can put constraint calculation in here too and cache it in a new entry in __slots__. self._fitness = sum( map(lambda m: coefficients[m] * measurements[m], coefficients)) + penalties return self
def calculate_fitness(self): measurements = metrics.metrics(self.to_level()) # Print out the possible measurements or look at the implementation of metrics.py for other keys: # print(measurements.keys()) # Default fitness function: Just some arbitrary combination of a few criteria. Is it good? Who knows? # STUDENT Modify this, and possibly add more metrics. You can replace this with whatever code you like. coefficients = dict(meaningfulJumpVariance=0.5, negativeSpace=0.6, pathPercentage=0.5, emptyPercentage=0.6, linearity=-0.5, solvability=2.0) self._fitness = sum( map(lambda m: coefficients[m] * measurements[m], coefficients)) return self
def story_model(features, labels, mode, params): n_classes = params['n_classes'] # DNN with two hidden layers input_layer = tf.feature_column.input_layer(features, params['feature_columns']) net = input_layer #net = tl.merge_ops.merge ([input_layer, input_layer], 'concat') # hidden layers net = tf.layers.dense(net, units=512, activation=tf.nn.relu) net = tf.layers.dense(net, units=512, activation=tf.nn.relu) #net = tf.nn.dropout(tf.layers.dense(net, units=units, activation=tf.nn.relu),0.5) # Compute logits (1 per class). logits = tf.layers.dense(net, n_classes, activation=None) # Compute predictions. predicted_classes = tf.argmax(logits, 1) if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'class_ids': predicted_classes[:, tf.newaxis], 'probabilities': tf.nn.softmax(logits), 'top_10': tf.nn.top_k(logits, k=10)[1], 'ranked': tf.nn.top_k(logits, k=25)[1], 'logits': logits, } return tf.estimator.EstimatorSpec(mode, predictions=predictions) # Compute loss. loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) metrics = m.metrics(labels, predicted_classes, logits) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode, loss=loss, eval_metric_ops=metrics) # Create training op. assert mode == tf.estimator.ModeKeys.TRAIN optimizer = tf.train.AdagradOptimizer(learning_rate=0.1) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def worker_scores_doc_gt_helper(doc, annotype, scoretype, pruned_workers): markups = doc.markups[annotype] workers = [w for w in markups.keys() if w not in pruned_workers] nworkers = len(workers) gt_spans = doc.get_groundtruth(annotype) if not gt_spans: return {} worker_scores = {} for i in range(nworkers): worker_spans = markups[workers[i]] score = metrics.metrics(worker_spans, gt_spans, doc.ntokens, scoretype) worker_scores[workers[i]] = score return worker_scores
def calculate_fitness(self): measurements = metrics.metrics(self.to_level()) # Print out the possible measurements or look at the implementation of metrics.py for other keys: # print(measurements.keys()) coefficients = dict( meaningfulJumpVariance=1.0, #increased weight of jump variance to have more interesting levels negativeSpace=0.6, pathPercentage=0.5, emptyPercentage=0.6, linearity=-0.5, solvability=2.0, jumps=0.02 #added to emphasize maps with more jumps ) self._fitness = sum(map(lambda m: coefficients[m] * measurements[m], coefficients)) return self
def calculate_fitness(self): measurements = metrics.metrics(self.to_level()) # Default fitness function: Just some arbitrary combination of a few criteria. Is it good? Who knows? # STUDENT Add more metrics? # STUDENT Improve this with any code you like coefficients = dict( meaningfulJumpVariance=0.5, negativeSpace=0.6, pathPercentage=0.5, emptyPercentage=0.6, linearity=-0.5, solvability=2.0 ) penalties = 0 # STUDENT For example, too many stairs are unaesthetic. Let's penalize that stairs_list = list(filter(lambda de: de[1] == "6_stairs", self.genome)) if len(stairs_list) > 3: penalties -= 2 for stair in stairs_list: if stair[0] < 20 and stair[2] > height-3: #if the stairs are in front of mario penalties -= 4 if len(list(filter(lambda de: de[1] == "7_pipe", self.genome))) > 3: penalties -= 2 # pipe_list = list(filter(lambda de: de[1] == '7_pipe', self.genome)) # for pipe in pipe_list: # temp_penalty=0 # x_pipe = pipe[0]-8 #anything between this and pipe[0] is good # y_pipe = pipe[2]-4 #anything between this and 0 is good # for gene in self.genome: # if gene[1] == '2_enemy': # continue # gene_x = gene[0] # gene_y = gene[2] # if gene_x in range(x_pipe, pipe[0]) and gene_y in range(0, y_pipe): # temp_penalty = 0 # break # temp_penalty = 1 # penalties -= temp_penalty # STUDENT If you go for the FI-2POP extra credit, you can put constraint calculation in here too and cache it in a new entry in __slots__. self._fitness = sum(map(lambda m: coefficients[m] * measurements[m], coefficients)) + penalties return self
def runBot(self, r): logging.info('Running...') for mention in r.inbox.mentions(limit=25): logging.info('Comment found') logging.info('Comment.id is: ' + mention.id) met = metrics() parentAuth = mention.parent().author authData = met.getAuthorData(r, parentAuth) botProbability = str( self.__makePrediction(met.aggregateMetrics(authData))[:, 1]) botProbability = botProbability.replace('[', '') botProbability = botProbability.replace(']', '') mention.reply("User " + str(parentAuth) + " is a bot with a probability of " + str(botProbability) + "0%")
def radius(X,Y,metric = "minkowski"):#Handle that metric parameter radiusList = list() for x0,y0 in zip(X,Y): subDistance = 0 for x,y in zip(X,Y): metricObj = metrics(X = (x0,y0),Y = (x,y)) dist = metricObj.minkowskiDistance() if dist > subDistance: subDistance = dist else: pass radiusList.append(((x0,y0),subDistance)) try: return min(radiusList,key= lambda element:element[1]) except: print("X :",X) print("Y :",Y)
def mdr_model(train_in, train_out, test_in, test_out, train_profile, test_profile, configs): nnnet = network(configs) ################ # train and test loss_list = [] with tf.Session() as sess: batchgen = Batch(train_in, train_out, train_profile) sess.run(tf.global_variables_initializer()) for ep in range(configs["epoches"]): x, y, prof_batch = batchgen.next(configs["batch_size"]) fd = { nnnet["flow_input"]: x[:, :, :configs["flow_features"]], nnnet["stock_input"]: x[:, :, configs["flow_features"]:], nnnet["profile_input"]: prof_batch, nnnet["decoder_targets"]: y } _, runloss, train_res = sess.run( [nnnet["opt"], nnnet["loss"], nnnet["train_res"]], fd) loss_list.append(runloss) ############################## test testdata = Batch(test_in, test_out, test_profile) ground_truth = [] predictions = [] for _ in range(int(test_in.shape[0] / configs["batch_size"])): x_test, y_test, prof_test = testdata.next(configs["batch_size"]) fd = { nnnet["flow_input"]: x_test[:, :, :configs["flow_features"]], nnnet["stock_input"]: x_test[:, :, configs["flow_features"]:], nnnet["profile_input"]: prof_test, nnnet["decoder_targets"]: y_test } ddd, alpha_res = sess.run([nnnet["predictions"], nnnet["alphas"]], fd) ground_truth.append(y_test) predictions.append(ddd) ground_truth = np.concatenate(ground_truth) predictions = np.concatenate(predictions) return metrics(ground_truth, predictions)
def worker_scores_doc_helper(doc, annotype, scoretype, pruned_workers, max_workers=DEFAULT_MAX_WORKERS): markups = doc.markups[annotype] workers = [w for w in markups.keys() if w not in pruned_workers] nworkers = len(workers) if nworkers > max_workers: random.shuffle(workers) workers = workers[:max_workers] workers.sort() nworkers = max_workers worker_scores = {} if nworkers <= 1: print "[Warn] Only one worker for doc {0}, do not calculate worker score.".format( doc.docid) else: for wid in workers: worker_scores[wid] = [] for i in range(nworkers - 1): w1_spans = markups[workers[i]] w1_scores = [] for j in range(i + 1, nworkers): if i == j: continue w2_spans = markups[workers[j]] score = metrics.metrics(w1_spans, w2_spans, doc.ntokens, scoretype) worker_scores[workers[i]].append(score) worker_scores[workers[j]].append(score) for wid in worker_scores: worker_scores[wid] = np.mean(worker_scores[wid]) return worker_scores
def analyze(directory, use_cache=True): # use cache or recompute cache = os.path.join(directory, "metrics.json") if use_cache and os.path.isfile(cache): print('using cached metrics for', directory) with open(cache, "r") as fp: # load json and convert keys back to int return {int(k): v for k, v in json.load(fp).items()} print('computing metrics for', directory) # import here coz it takes consierable amount of time (>1sec) from metrics import metrics # search dot files dots_by_n = {} for file in os.listdir(directory): if not file[-4:] == '.dot': continue n = int(file.split('_')[0]) if n not in dots_by_n: dots_by_n[n] = [] dots_by_n[n].append(os.path.join(directory, file)) # validate data samples_per_n = len(next(iter(dots_by_n.values()))) if False in (len(files) == samples_per_n for files in dots_by_n.values()): print("not all n have same number of samples!") exit() #calculate metrics all_data = { n: [metrics(file, use_cache) for file in files] for (n, files) in dots_by_n.items() } # update cache with open(cache, "w") as fp: json.dump(all_data, fp) return all_data
def calculate_fitness(self): measurements = metrics.metrics(self.to_level()) # Default fitness function: Just some arbitrary combination of a few criteria. Is it good? Who knows? # STUDENT Add more metrics? # STUDENT Improve this with any code you like coefficients = dict(meaningfulJumpVariance=0.5, negativeSpace=0.6, pathPercentage=0.5, emptyPercentage=0.6, linearity=-0.5, solvability=2.0) penalties = 0 # STUDENT For example, too many stairs are unaesthetic. Let's penalize that if len(list(filter(lambda de: de[1] == "6_stairs", self.genome))) > 5: penalties -= 2 # Penalize 1 width gaps if len( list( filter(lambda de: de[1] == "0_holes" and de[2] == 1, self.genome))) > 1: penalties -= 0.5 one_wide_gap_count = len( list( filter(lambda de: de[1] == "0_holes" and de[2] == 1, self.genome))) penalties -= min(1, one_wide_gap_count * 0.1) #Coins are fun! Add more of them coin_count = len( list(filter(lambda de: de[1] == "3_coin", self.genome))) penalties += coin_count * 0.05 if coin_count < 30 else -1 # STUDENT If you go for the FI-2POP extra credit, you can put constraint calculation in here too and cache it in a new entry in __slots__. self._fitness = sum( map(lambda m: coefficients[m] * measurements[m], coefficients)) + penalties return self
def compose_estimator(mode, labels, logits, probs, predictions, params, lengths=None): # mode: PREDICT # ugly code, need to refactor crf part if 'CRF' in params and params.CRF: predictions, transition_matrix = crf_predictions( logits, lengths, params.classes_count) if mode == tf.estimator.ModeKeys.PREDICT: predicted_labels = rev_lookup(predictions, params.label_vocab_file) return tf.estimator.EstimatorSpec(mode, predictions={ 'class': predicted_labels, 'prob': probs }) # mode: TRAIN label_ids = lookup(labels, params.label_vocab_file) # print label_ids.shape if 'CRF' in params and params.CRF: loss = crf_loss(logits, label_ids, lengths, transition_matrix) else: loss = tf.losses.sparse_softmax_cross_entropy(label_ids, logits) if mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer(params.learning_rate) \ .minimize(loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) # mode: EVAL eval_metrics_ops = metrics(label_ids, predictions) return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=eval_metrics_ops)
def evaluate(self, users, items, sales_history, user_recommendations, controlId): ### from the metrics sales_history_old = sales_history.copy() sales_history_new = sales_history.copy() prior_recommendations = np.copy(items.hasBeenRecommended) awareness = copy.deepcopy(users.Awareness) for user in users.activeUserIndeces: Rec=np.array([-1]) if user not in user_recommendations.keys(): self.printj(" -- Nothing to recommend -- to user ",user) continue Rec = user_recommendations[user] prior_recommendations[Rec] = 1 awareness[user, Rec] = 1 # If recommended but previously purchased, minimize the awareness awareness[user, np.where(sales_history_old[user,Rec]>0)[0] ] = 0 for user in users.activeUserIndeces: Rec=np.array([-1]) if user not in user_recommendations.keys(): self.printj(" -- Nothing to recommend -- to user ",user) continue Rec = user_recommendations[user] indecesOfChosenItems,indecesOfChosenItemsW = users.choiceModule(Rec, awareness[user,:], controlId[user,:], users.sessionSize(),) sales_history_new[user, indecesOfChosenItems] += 1 metric = metrics.metrics(sales_history_old, user_recommendations, items.ItemsFeatures, items.ItemsDistances, sales_history_new) return metric["EPC"]
def index(): g.update({"metrics": metrics.metrics()}) return render_template("index.html")
a.applyThreshold() a.removeUnconnectedNodes() a.importSpatialInfo(parcelFile) # total weights weights = [mbt.np.absolute(a.G.edge[v[0]][v[1]]["weight"]) for v in a.G.edges()] T_start = mbt.np.sum(weights) print T_start wtLossPC = 5.0 # five percent loss of connections at each iteration wtLoss = T_start * (wtLossPC / 100) appVal = False for n in range(3, 4): print "doing degenerative process" a.degenerate(weightloss=0.05, weightLossLimit=wtLoss) a.reconstructAdjMat() print "getting metrics" metrics( a, appVal, degenName, pcLoss=str(wtLossPC) ) # something in the metrics is reinstating too many edges, including node 0 connected to everything appVal = True # total weights weights = [mbt.np.absolute(a.G.edge[v[0]][v[1]]["weight"]) for v in a.G.edges()] T_final = mbt.np.sum(weights) print T_final print (datetime.now() - startTime)
elif sys.argv[1] == 'svm_predict': model_filename = svm_model_filename # recording = recordings[randint(0,len(recordings)-1)] numGroundtruthIntervals, numDetectedIntervals, numCorrect = 0,0,0 number_recording = getRecordingNumber('TEST') for ii in number_recording: recording = recordings[ii] print 'evaluate %s' % recording sumNumGroundtruthIntervals, sumNumDetectedIntervals, sumNumCorrect = \ predict(textgrid_path,feature_path,scaler_filename,model_filename,recording,varin) numGroundtruthIntervals += sumNumGroundtruthIntervals numDetectedIntervals += sumNumDetectedIntervals numCorrect += sumNumCorrect HR, OS, FAR, F, R, deletion, insertion = \ metrics.metrics(numDetectedIntervals, numGroundtruthIntervals, numCorrect) print ('HR %.3f, OS %.3f, FAR %.3f, F %.3f, R %.3f, deletion %i, insertion %i, gt %i' % (HR, OS, FAR, F, R, deletion, insertion,numGroundtruthIntervals)) print ('gt %i, detected %i, correct %i' % (numGroundtruthIntervals,numDetectedIntervals,numCorrect)) # not used in the paper elif sys.argv[1] == 'gmm_cv': gmm_cv(fv_train,target_train,fv_train,target_train) elif sys.argv[1] == 'rf_cv': rf_cv(fv_train_transformed,target_train,fv_train_transformed,target_train) elif sys.argv[1] == 'rf_model': rf_model(fv_train, target_train, n_estimators=1000, max_depth=None, max_features='auto',
def predict(): icdPatterns_test,voicedPatterns_test,index_vp_test,\ f_s_test,f_vuv_s_test,spec_test,pho_s_test,\ gtb_test,db_test,gtbv_test,dbv_test = loadDataAll(var_names) svm_model_object= joblib.load(svm_model_filename) sumNumGroundtruthBoundaries, sumNumDetectedBoundaries, sumNumCorrect = 0,0,0 for ii in range(len(gtbv_test)): gtb_song = gtb_test[ii] db_song = db_test[ii] dbv_song = dbv_test[ii] voicedPatterns_song = voicedPatterns_test[ii] index_vp_song = index_vp_test[ii] f_vuv_s_song = f_vuv_s_test[ii] spec_song = spec_test[ii] pho_s_song = pho_s_test[ii] for jj in range(len(gtb_song)): print 'index test syllable ', jj, 'in total ', len(gtb_song) spec = spec_song[jj] pho_s = pho_s_song[jj] gtb = gtb_song[jj] db = db_song[jj] dbv = dbv_song[jj] dbuv = np.setdiff1d(db,dbv) voicedPatterns = voicedPatterns_song[jj] if len(voicedPatterns): index_vp = np.array(index_vp_song[jj]) target = svm_model_object.predict(voicedPatterns) dbvc = dbv[index_vp[np.nonzero(1-target)]] # detected boundaries voiced correct dbc = np.hstack((dbuv,dbvc)) else: dbc = dbuv dbc = dbc*fs/varin['hopsize'] dbc = np.array(boundaryReduction(dbc.tolist()))*(varin['hopsize']/float(fs)) if varin['plot']: print pho_s plotDetection(spec,gtb,dbc,varin) numDetectedBoundaries, numGroundtruthBoundaries, numCorrect = \ metrics.boundaryDetection(groundtruthBoundaries=gtb, detectedBoundaries=dbc, tolerance=varin['tolerance']) sumNumGroundtruthBoundaries += numGroundtruthBoundaries sumNumDetectedBoundaries += numDetectedBoundaries sumNumCorrect += numCorrect HR, OS, FAR, F, R, deletion, insertion = \ metrics.metrics(sumNumDetectedBoundaries, sumNumGroundtruthBoundaries, sumNumCorrect) print ('HR %.3f, OS %.3f, FAR %.3f, F %.3f, R %.3f, deletion %i, insertion %i' % (HR, OS, FAR, F, R, deletion, insertion)) print ('ground truth %i, detected %i, correct %i' % (sumNumGroundtruthBoundaries,sumNumDetectedBoundaries,sumNumCorrect))
f, axarr = plt.subplots(2, sharex=True) axarr[0].pcolormesh(timestamps_spec, binFreqs, 20*np.log10(mXPlot+eps)) for b_gt in boundary_gt: axarr[0].axvline(b_gt) for zf in boundary_detected: axarr[1].axvline(zf) # axarr[1].plot(x_frame,target_test) plt.show() boundary_gt = np.array(boundary_gt)*(varin['hopsize']/float(fs)) boundary_detected = np.array(boundary_detected)*(varin['hopsize']/float(fs)) print boundary_gt, boundary_detected numDetectedBoundaries, numGroundtruthBoundaries, numCorrect = \ metrics.boundaryDetection(groundtruthBoundaries=boundary_gt, detectedBoundaries=boundary_detected, tolerance=varin['tolerance']) print numDetectedBoundaries, numGroundtruthBoundaries, numCorrect sumNumGroundtruthBoundaries += numGroundtruthBoundaries sumNumDetectedBoundaries += numDetectedBoundaries sumNumCorrect += numCorrect HR, OS, FAR, F, R, deletion, insertion = \ metrics.metrics(sumNumDetectedBoundaries, sumNumGroundtruthBoundaries, sumNumCorrect) print HR, OS, FAR, F, R, deletion, insertion, sumNumDetectedBoundaries, sumNumGroundtruthBoundaries, sumNumCorrect