def run(self, args): a = feature_matrix.FeatureMatrix().load(args.infile) if args.labels is True: print "Stored ", len(a.metadata.filename), " files" for f in a.metadata.filename: print f exit() #classe = a.metadata.filename.split("/")[-1].split(".")[0] # verification #print a.metadata.feature #print a.data, a.data.shape feature_names = a.metadata.feature.split() feature_names.sort() if args.features is True: if len(feature_names) != a.data.shape[1]: print "Feature names are inconsistent with data!" print feature_names print len(feature_names) print a.data.shape exit() for f in feature_names: sys.stdout.write(f) if args.csv is True: sys.stdout.write(",") sys.stdout.write(" ") print "label" else: for i in xrange(len(a.metadata.filename)): for j in xrange(a.data.shape[1] - 1): sys.stdout.write(str(a.data[i, j])) if args.csv is True: sys.stdout.write(",") sys.stdout.write(" ") sys.stdout.write(str(a.data[i, j + 1])) sys.stdout.write(", ") #A linha seguinte esta porquinha. #masss (requer que o nome da classe seja o primeiro no arquivo sys.stdout.write( a.metadata.filename[i].split("/")[-1].split(".")[0]) print ""
def run(self, args): a = feature_matrix.FeatureMatrix().load(args.database) file_label_dict = self.label_list(open(args.labels, 'rb')) # Start join process output = "" # Title row output += "filename," for i in xrange(a.data.shape[1]): output += "F" + str(i) + "," output += "Class\n" for i in xrange(len(a.metadata.filename)): fname = a.metadata.filename[i].split('/')[-1] output += fname + "," for j in xrange(a.data.shape[1]): output += str(a.data[i][j]) + "," output += str(file_label_dict[fname]) + "\n" print output
def run(self): print "training model from train filelist: %s" % ( self.params['general']['train_filelist']) m = feature_matrix.FeatureMatrix() mf = open(self.params['general']['scratch_directory'] + "/" + self.params['feature_aggregation']['aggregated_output']) m = m.load(mf) mf.close() with open(self.params['general']['train_filelist']) as f: linhas = f.readlines() files = dict() for i in xrange(len(m.metadata.filename)): files[m.metadata.filename[i]] = i labels = [] features = [] for i in xrange(len(linhas)): filename = linhas[i].split("\t")[0].strip() label = linhas[i].split("\t")[1].strip() labels.append(label) feat = m.data[files[filename]] features.append(feat) features = numpy.array(features) files = None input = ModelTrainerInput(features, labels) gc.collect() self.train(input)
def run(self, args): o = track.FeatureTrack() o = o.load(args.trackfile) #print o.metadata #print o.data # Read label file onsets = [] offsets = [] labels = [] with open(args.labelfile) as f: content = f.readlines() for line in content: L = line.split() #print L onsets.append(float(L[0])) offsets.append(float(L[1])) L[2] = L[2].replace('_', '-') L[2] = L[2].replace('-', '+') labels.append(str(L[2].split('+')[0])) #print onsets[-1], offsets[-1], labels[-1] #exit() final_output = None final_filenames = [] final_filenames.append(o.metadata.filename) if o.data.ndim == 1: o.data.shape = (o.data.size, 1) feats = o.metadata.feature.split() a = numpy.array(feats) i = a.argsort() ofs = o.metadata.sampling_configuration.ofs #print offsets[-1], o.data.shape, o.data.shape[0] / float(ofs) #print i my_features = o.metadata.feature.split() my_features.sort() new_features = "" if args.mean is True: for feat in my_features: new_features = new_features + " " + "mean_" + feat if args.variance is True: for feat in my_features: new_features = new_features + " " + "var_" + feat if args.slope is True: for feat in my_features: new_features = new_features + " " + "slope_" + feat if args.limits is True: for feat in my_features: new_features = new_features + " " + "max_" + feat for feat in my_features: new_features = new_features + " " + "argmax_" + feat for feat in my_features: new_features = new_features + " " + "min_" + feat for feat in my_features: new_features = new_features + " " + "argmin_" + feat new_features = new_features.strip() if args.csv is True: sys.stdout.write(new_features.replace(' ', ',')) sys.stdout.write(',LABEL') sys.stdout.write('\n') #exit() for d in xrange(len(onsets)): out = numpy.array([]) i = a.argsort() minN = int(onsets[d] * float(ofs)) maxN = int(offsets[d] * float(ofs)) if maxN <= (minN + 1): maxN = minN + 2 #print minN, maxN, onsets[d], offsets[d], o.data.shape[0], ofs if args.mean is True: out = numpy.hstack((out, o.data[minN:maxN, :].mean(axis=0)[i])) #print out.shape, o.data[minN:maxN,:].mean(axis=0).shape if args.variance is True: out = numpy.hstack((out, o.data[minN:maxN, :].var(axis=0)[i])) if args.slope is True: variance = o.data[minN:maxN, :].var(axis=0)[i] lindata = numpy.zeros(variance.shape) for i in xrange(o.data[minN:maxN, i].shape[1]): lindata[i] = scipy.stats.linregress(o.data[minN:maxN,i],\ range(o.data[minN:maxN,:].shape[0]))[0] out = numpy.hstack((out, lindata)) if args.csv is True: for i in xrange(len(out)): sys.stdout.write(str(out[i])) sys.stdout.write(",") sys.stdout.write(labels[d]) sys.stdout.write('\n') if final_output is None: final_output = out else: final_output = numpy.vstack((final_output, out)) p = feature_matrix.FeatureMatrix() p.data = final_output.copy() if args.normalize: std_p = p.data.std(axis=0) p.data = (p.data - p.data.mean(axis=0))/\ numpy.maximum(10**(-6), std_p) p.metadata.sampling_configuration = o.metadata.sampling_configuration p.metadata.feature = new_features p.metadata.filename = final_filenames p.save(args.outfile)
def stats(self, feature_tracks, mean=False, variance=False, delta=False, acceleration=False, slope=False, limits=False, csv=False, normalize=False): final_output = None final_filenames = [] for o in feature_tracks: #print o.metadata.feature a = numpy.array(o.metadata.feature.split()) i = a.argsort() final_filenames.append(o.metadata.filename) if o.data.ndim == 1: o.data.shape = (o.data.size, 1) out = numpy.array([]) if delta is True: #print "o.data shape", o.data.shape #print "out shape ", out.shape d = numpy.mean(delta_stat(o.data, order=1, axis=0), axis=0)[i] #print "d shape ", d.shape out = numpy.hstack((out, d)) if acceleration is True: #print "o.data shape", o.data.shape #print "out shape ", out.shape d = numpy.mean(delta_stat(o.data, order=2, axis=0), axis=0)[i] #print "d shape ", d.shape out = numpy.hstack((out, d)) if mean is True: out = numpy.hstack((out, o.data.mean(axis=0)[i])) # print out.shape if variance is True: out = numpy.hstack((out, o.data.var(axis=0)[i])) # print out.shape if slope is True: variance = o.data.var(axis=0)[i] lindata = numpy.zeros(variance.shape) for i in xrange(o.data.shape[1]): lindata[i] = scipy.stats.linregress(o.data[:,i],\ range(o.data.shape[0]))[0] out = numpy.hstack((out, lindata)) if limits is True: out = numpy.hstack((out, o.data.max(axis=0)[i])) out = numpy.hstack( (out, o.data.argmax(axis=0)[i] / float(o.data.shape[0]))) out = numpy.hstack((out, o.data.min(axis=0)[i])) out = numpy.hstack( (out, o.data.argmin(axis=0)[i] / float(o.data.shape[0]))) out.shape = (1, out.size) if csv is True: for i in xrange(len(out) - 1): sys.stdout.write(str(out[i])) sys.stdout.write(",") sys.stdout.write(str(out[-1])) sys.stdout.write('\n') if final_output is None: final_output = out else: final_output = numpy.vstack((final_output, out)) # Dealing with feature metadata: my_features = o.metadata.feature.split() my_features.sort() new_features = "" if delta is True: for feat in my_features: new_features = new_features + " " + "delta_" + feat if acceleration is True: for feat in my_features: new_features = new_features + " " + "accel_" + feat if mean is True: for feat in my_features: new_features = new_features + " " + "mean_" + feat if variance is True: for feat in my_features: new_features = new_features + " " + "var_" + feat if slope is True: for feat in my_features: new_features = new_features + " " + "slope_" + feat if limits is True: for feat in my_features: new_features = new_features + " " + "max_" + feat for feat in my_features: new_features = new_features + " " + "argmax_" + feat for feat in my_features: new_features = new_features + " " + "min_" + feat for feat in my_features: new_features = new_features + " " + "argmin_" + feat #print new_features p = feature_matrix.FeatureMatrix() p.data = final_output.copy() if normalize: std_p = p.data.std(axis=0) p.data = (p.data - p.data.mean(axis=0))/\ numpy.maximum(10**(-6), std_p) p.metadata.sampling_configuration = o.metadata.sampling_configuration p.metadata.feature = new_features p.metadata.filename = final_filenames return p