def __process_data(self) -> None: train_generator_data = ImageDataGenerator(rescale=1. / 255, zoom_range=0.3, vertical_flip=True) test_validation_generator_data = ImageDataGenerator(rescale=1. / 255) self.train_generated_images = self.__generate_images( 'train', train_generator_data) self.test_validation_generated_images = \ self.__generate_images('test', test_validation_generator_data) float_type = 'float32' test_data = [] test_labels = [] test_folder = path.join(self.folder, 'test') for label, class_folder in enumerate(['NORMAL', 'PNEUMONIA']): for image in (os.listdir(path.join(test_folder, class_folder))): image = plt.imread(path.join(test_folder, class_folder, image)) image = cv2.resize( image, (self.image_dimension, self.image_dimension)) image = np.dstack([image, image, image]) image = image.astype(float_type) / 255 test_data.append(image) test_labels.append(label) self.test_images = np.array(test_data) self.test_labels = np.array(test_labels)
def calculate_matrix(img_ds): # Physical distance (in mm) between the center of each image pixel, specified by a numeric pair # - adjacent row spacing (delimiter) adjacent column spacing. dist_row = img_ds.PixelSpacing[0] dist_col = img_ds.PixelSpacing[1] # The direction cosines of the first row and the first column with respect to the patient. # 6 values inside: [Xx, Xy, Xz, Yx, Yy, Yz] orientation = img_ds.ImageOrientationPatient # The x, y, and z coordinates of the upper left hand corner # (center of the first voxel transmitted) of the image, in mm. # 3 values: [Sx, Sy, Sz] position = img_ds.ImagePositionPatient # Equation C.7.6.2.1-1. # https://dicom.innolitics.com/ciods/rt-structure-set/roi-contour/30060039/30060040/30060050 matrix_M = np.matrix( [[orientation[0] * dist_row, orientation[3] * dist_col, 0, position[0]], [orientation[1] * dist_row, orientation[4] * dist_col, 0, position[1]], [orientation[2] * dist_row, orientation[5] * dist_col, 0, position[2]], [0, 0, 0, 1]] ) x = [] y = [] for i in range(0, img_ds.Columns): i_mat = matrix_M * np.matrix([[i], [0], [0], [1]]) x.append(float(i_mat[0])) for j in range(0, img_ds.Rows): j_mat = matrix_M * np.matrix([[0], [j], [0], [1]]) y.append(float(j_mat[1])) return np.array(x), np.array(y)
def pred_input(str_input, path_hyper_parameter=path_hyper_parameters): # 输入预测 # 加载超参数 hyper_parameters = load_json(path_hyper_parameter) pt = PreprocessTextMulti() # 模式初始化和加载 graph = Graph(hyper_parameters) graph.load_model() ra_ed = graph.word_embedding ques = str_input # str to token ques_embed = ra_ed.sentence2idx(ques) if hyper_parameters['embedding_type'] == 'bert': x_val_1 = np.array([ques_embed[0]]) x_val_2 = np.array([ques_embed[1]]) x_val = [x_val_1, x_val_2] else: x_val = ques_embed # 预测 pred = graph.predict(x_val) print(pred) # 取id to label and pred pre = pt.prereocess_idx(pred[0]) ls_nulti = [] for ls in pre[0]: if ls[1] >= 0.73: ls_nulti.append(ls) print(str_input) print(pre[0]) print(ls_nulti)
def address_types(): ip_only_1 = session.execute( "select count(*) from nodes_1_channel where total_ip > 0 and total_addresses=total_ip" ).scalar() ip_only_5_10 = session.execute( "select count(*) from nodes_5_10_channels where total_ip > 0 and total_addresses=total_ip" ).scalar() ip_only_best_connected = session.execute( "select count(*) from nodes_best_connected where total_ip > 0 and total_addresses=total_ip" ).scalar() tor_only_1 = session.execute( "select count(*) from nodes_1_channel where total_tor > 0 and total_addresses=total_tor" ).scalar() tor_only_5_10 = session.execute( "select count(*) from nodes_5_10_channels where total_tor > 0 and total_addresses=total_tor" ).scalar() tor_only_best_connected = session.execute( "select count(*) from nodes_best_connected where total_tor > 0 and total_addresses=total_tor" ).scalar() both_1 = session.execute( "select count(*) from nodes_1_channel where total_tor > 0 and total_ip > 0" ).scalar() both_5_10 = session.execute( "select count(*) from nodes_5_10_channels where total_tor > 0 and total_ip > 0" ).scalar() both_best_connected = session.execute( "select count(*) from nodes_best_connected where total_tor > 0 and total_ip > 0" ).scalar() node_types = np.array([ 'Nodes with 1 channel', 'Nodes with 5-10 channels', 'Nodes with higest number\n of channels in the network' ]) ip_only = np.array([ip_only_1, ip_only_5_10, ip_only_best_connected]) tor_only = np.array([tor_only_1, tor_only_5_10, tor_only_best_connected]) both = np.array([both_1, both_5_10, both_best_connected]) total_1 = ip_only_1 + tor_only_1 + both_1 total_5_10 = ip_only_5_10 + tor_only_5_10 + both_5_10 total_best = ip_only_best_connected + tor_only_best_connected + both_best_connected total = [total_1, total_5_10, total_best] proportion_ip = np.true_divide(ip_only, total) * 100 proportion_tor = np.true_divide(tor_only, total) * 100 proportion_both = np.true_divide(both, total) * 100 df = DataFrame([proportion_ip, proportion_tor, proportion_both]).transpose() df.index = node_types df.columns = [ 'IP addresses only', 'Onion addresses only', 'Both kinds of addresses' ] print(df) plot = df.plot(kind='bar', stacked=True) save_plot(plot, 'address_types.png', tilt_x_labels=True)
def calDertaGd(x, y, dataYi, vh, wh, gamaH, thetaJ, yita): gi = calGi(y, dataYi) bh = calBh(x, vh, gamaH) eh = calEh(wh, gi, bh) dertaWh = yita * gi * bh dertaThetaj = -yita * gi dertaVih = yita * np.dot(np.array([x]).T, np.array([eh])) dertaGameH = -yita * eh return np.array([dertaWh]).T, dertaThetaj, dertaVih, np.array([dertaGameH])
def calculate_kdj(self): self._slowk_list, self._slowd_list = talib.STOCH( np.array(self._high_list), np.array(self._low_list), np.array(self._close_list), fastk_period=9, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
def artificialTest(): dataType = 1 #0 syntetic, 1 real modelType = 1 #0 classification, 1 regression dataPath = "data/" dataSets = ut.constructDatasetNames(dataType,modelType,dataPath) #dataSets = dataSets[22:24] #print dataSets i=0 verboseClassifiers = True for f in dataSets: maxAcc = 1000000*modelType bestRun = False data = read_csv(f) #data = data[0:2000] X = np.array(data.ix[:,0:-1]) y = np.array(data.ix[:,-1]) print f startTime = time.time() acc = ml.modelJudge(X=X, y=y, modelType=modelType, testPerc=0.4, runs=3) endTime = time.time() if(modelType==0): print "original:", str(acc*100)+"%", "#"+str(X.shape[1]), "n:"+str(X.shape[0]), str(round(endTime-startTime,3))+"s" else: print "original:", "e: "+str(acc), "#"+str(X.shape[1]), "n:"+str(X.shape[0]), str(round(endTime-startTime,3))+"s" for minRed in [0,1]:#range(0,2): for binMethod in [0]:#range(0,2): for cutMethod in [3]:#range(0,4): for measure in [0,1,2,3,4]:#range(0,6): startTime = time.time() rank = fs.featureSelection(X=X,y=y, modelType=modelType, runs=3, processes=0, measure=measure, binMethod=binMethod, cutMethod=cutMethod, minRed=minRed, rrThreshold=0.9, debug=False) endTime = time.time() timefs = round(endTime-startTime,3) X = np.array(data.ix[:,rank]) startTime = time.time() acc = ml.modelJudge(X=X, y=y, modelType=modelType, testPerc=0.4, runs=3) endTime = time.time() timecf = round(endTime-startTime,3) if(modelType==0): print "[",minRed, binMethod, cutMethod, measure, "]", str(acc*100)+"%", str(timefs)+"s", str(timecf)+"s", "#"+str(len(rank)), rank[0:10] bestRun = True if acc>maxAcc else False else: print "[",minRed, binMethod, cutMethod, measure, "]", "e: "+str(acc), str(timefs)+"s", str(timecf)+"s", "#"+str(len(rank)), rank[0:10] bestRun = True if acc<maxAcc else False if(bestRun): maxAcc = acc maxRank = rank maxTimefs = timefs maxTimecf = timecf configuration = [minRed,binMethod,cutMethod,measure] bestRun = False X = np.array(data.ix[:,0:-1]) if(modelType==0): print "best:", configuration, str(maxAcc*100)+"%", str(maxTimefs)+"s", str(maxTimecf)+"s", "#"+str(len(maxRank)), maxRank[0:10] else: print "best:", configuration, "e: "+str(maxAcc), str(maxTimefs)+"s", str(maxTimecf)+"s", "#"+str(len(maxRank)), maxRank[0:10]
def evaluate_model(): """ Method to start evaluation 1. Check if the evaluation data is ready. If yes evaluate the model. If not create the data. 2. If evaluation data is not ready: 2.1 Load the source evaluation dataframe 2.2 Remove catalogs from the sequences. 2.3 Create a copy of raw sequence information to a new column. (this is useful for the models use encodings) 2.4 Remove the columns other than user_id, session_id, user_sequence_ decoded_individual_sequence columns. 2.5 Convert the data to a list and save it to file system. """ item2vec_model = load_i2v_model(f'../utils/item2vec.model') if not os.path.exists('evaluation_data/evaluation_sequences.pickle'): evaluation_dataframe = joblib.load(f'../source_data/common_data/evaluation_dataframe.pickle') evaluation_dataframe.drop(['user_log_list'], axis=1, inplace=True) evaluation_dataframe = remove_catalogs(dataframe=evaluation_dataframe) evaluation_dataframe['decoded_item_sequence'] = evaluation_dataframe['item_sequence'].copy(deep=True) evaluation_dataframe.drop(['catalog_item_list', 'session_start_time', 'good_catalog_items'], axis=1, inplace=True) sequences = evaluation_dataframe.values.tolist() sequences = np.array(sequences) joblib.dump(sequences, 'evaluation_data/evaluation_sequences.pickle') else: sequences = joblib.load('evaluation_data/evaluation_sequences.pickle') evaluation_creator(model=item2vec_model, model_name='base_item2vec_model', sequence_to_evaluate=sequences)
def __init__(self, trajectoryID, points): self.trajectoryID = trajectoryID self.max_x = 0 self.max_y = 0 self.min_x = 0 self.min_y = 0 self.xs = [] self.ys = [] # compute max and minimum of the trajectory for el in points: self.xs.append(el.x) self.ys.append(el.y) self.max_x = np.amax(np.array(self.xs)) self.min_x = np.amin(np.array(self.xs)) self.max_y = np.amax(np.array(self.ys)) self.min_y = np.amin(np.array(self.ys))
def Logaritmik(self): if self.image is not None: log_degeri = int(input("Logaritmik değeri giriniz: ")) c = log_degeri / (np.log10(1 + np.max(self.image))) self.log_transformed = c * np.log10(1 + self.image) self.image = np.array(self.log_transformed, dtype=np.uint8) self.processedImg()
def run(self) -> None: number = 240 link = "https://www.bitmex.com/api/v1/trade?symbol=.BXBT&count=" + \ str(number) + "&columns=price&reverse=true" f = requests.get(link) prices = [] for x in f.json(): prices.append(x['price']) print(prices) prices.reverse() DATA = np.array(prices) # bbands = ti.bbands(DATA, period=5, stddev=2) # res = TA.BBANDS(get_all_bitmex('XBTUSD', '1m', False, nb=number)) # high = bbands[0] # middle = bbands[1] # low = bbands[2] # low = list(res.BB_LOWER[120:]) # middle = list(res.BB_MIDDLE[120:]) # high = list(res.BB_UPPER[120:]) # plt.plot(prices, color='red') # plt.plot(high, color='orange') # plt.plot(middle, color='g') # plt.plot(low, color='yellow') # plt.show() # get_mean_open_close(80, '1m') order_manager = OrderManager() # Try/except just keeps ctrl-c from printing an ugly stacktrace try: self.bitmex.ws.recent_trades() order_manager.run_loop() except (KeyboardInterrupt, SystemExit): sys.exit()
def view_image(image_arr, xs=None): arr = np.array(image_arr, dtype=np.uint8) arr.resize((96, 96)) plt.imshow(arr, cmap='gray') for i in range(0, len(xs) - 1, 2): plt.scatter(xs[i], xs[i + 1], s=200, facecolors='none', edgecolors='r') plt.show()
def calculate_stochrsi(self): self._fastk_list, self._fastd_list = talib.STOCHRSI(np.array( self._close_list), timeperiod=14, fastk_period=3, fastd_period=3, fastd_matype=0)
def draw(name, p, clf, X, y, step, ): stepx = step stepy = step x_min, y_min = np.amin(X, 0) x_max, y_max = np.amax(X, 0) x_min -= stepx x_max += stepx y_min -= stepy y_max += stepy xx, yy = np.meshgrid(np.arange(x_min, x_max, stepx), np.arange(y_min, y_max, stepy)) mesh_dots = np.c_[xx.ravel(), yy.ravel()] zz = np.apply_along_axis(lambda t: clf.predict(t), 1, mesh_dots) zz = np.array(zz).reshape(xx.shape) plt.figure(figsize=(10, 10)) plt.xlim(x_min, x_max) plt.ylim(y_min, y_max) x0, y0 = X[y == -1].T x1, y1 = X[y == 1].T plt.pcolormesh(xx, yy, zz, cmap=ListedColormap(['#FFAAAA', '#AAAAFF'])) plt.scatter(x0, y0, color='red', s=100) plt.scatter(x1, y1, color='blue', s=100) sup_ind = clf.get_non_bound_indices() X_sup = X[sup_ind] x_sup, y_sup = X_sup.T plt.scatter(x_sup, y_sup, color='white', marker='x', s=60) plt.suptitle(p) plt.savefig(name + '_' + p['name'] + '.png') plt.show()
def Gamma(self): if self.image is not None: gamma = float(input("Gamma: ")) gamma_corrected = np.array(255 * (self.image / 255)**gamma, dtype='uint8') self.image = gamma_corrected self.processedImg()
def find_missing_seat(seats_data: List[Tuple[int, int]]) -> Tuple[int, int]: """ Given a list of seat coordinates taken, return the only one with previous and after seats occupied :param seats_data: Occupied seats data coordinates :return: """ # Create seats matrix ar = np.array(seats_data) res = np.zeros((PLANE_ROW_NUMBER, PLANE_COLUMN_NUMBER), dtype=int) res[ar[:, 0], ar[:, 1]] = 1 # Find all empty seats empty_seats_raw = np.where(res == 0) empty_seats = list(zip(empty_seats_raw[0], empty_seats_raw[1])) # Find the only valid empty seat for empty_seat in empty_seats: before_seat, after_seat = get_adjacent_seats(*empty_seat) if (before_seat is not None and after_seat is not None and res[before_seat[0]][before_seat[1]] and res[after_seat[0]][after_seat[1]]): return empty_seat
def mrmrTest(cutMethod=1, method=0, runs=3): #Artifial Datasets files = ['data1000-f1.csv', 'data1000-f2.csv','data1000-f3.csv','data1000-f4.csv','data5000-f1.csv', 'data5000-f2.csv','data5000-f3.csv','data5000-f4.csv','data20000-f1.csv', 'data20000-f2.csv','data20000-f3.csv','data20000-f4.csv','data1000-f1-r500.csv','data5000-f1-r500.csv','data20000-f1-r500.csv'] buenos = [[0,1,2,3,4,5,6,13,14],[0,1,8,9],[0,1,6,7],[0,1,3,2],[0,1,2,3,4,5,6,13,14],[0,1,8,9],[0,1,6,7],[0,1,3,2],[0,1,2,3,4,5,6,13,14],[0,1,8,9],[0,1,6,7],[0,1,3,2],[0,1,2,3,4,5,6,13,14],[0,1,2,3,4,5,6,13,14],[0,1,2,3,4,5,6,13,14]] modelsType = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] #Real Datasets #files = ['real/sonar_scale.csv', 'real/splice_scale.csv', 'real/colon-cancer.csv', 'real/leu.csv', 'real/duke.csv', 'real/BH20000.csv', 'real/madelon-test.csv'] #buenos = [['?'],['?'],['?'],['?'],['?'],['?'],['?']] #modelsType = [0,0,0,0,0,0,0] i=0 verboseClassifiers = True for f in files: modelType = modelsType[i] filepath = 'Data/'+f filepath2 = 'Data2/'+f data = read_csv(filepath) X = np.array(data.ix[:,0:-1]) y = np.array(data.ix[:,-1]) print (filepath, buenos[i]) startTime = time.time() if(modelType==0): acc = ml.clasificationJudge(X=X,y=y, testPerc=0.5, runs=runs) else: acc = ml.regresionJudge(X=X,y=y, testPerc=0.5, runs=runs) endTime = time.time() print ("original:", acc, X.shape[1], str(round(endTime-startTime,3))+"s") #try: startTime = time.time() [rank,featureImportance] = rankExtraction(filepath2,method) if(cutMethod==0): cutpos = cuts.greatestDiffCut(weights=featureImportance) elif(cutMethod==1): cutpos = cuts.monotonicValidationCut(X=X, y=y, modelType=modelType, rank=rank, consecutives=5, runs=runs) rank = rank[0:cutpos] endTime = time.time() timefs = round(endTime-startTime,3) X = np.array(data.ix[:,rank]) startTime = time.time() if(modelType==0): acc = ml.clasificationJudge(X=X,y=y, testPerc=0.5, runs=runs) else: acc = ml.regresionJudge(X=X,y=y, testPerc=0.5, runs=runs) endTime = time.time() timeml = round(endTime-startTime,3) print ("result: ",acc, timefs, timeml, len(rank), rank[0:5]) print ()
def __getitem__(self, item): noise_img, clean_img = self.data[item] noise_img = self.loader(noise_img) noise_img = cv2.medianBlur(np.array(noise_img), 3) noise_img = self.transform(noise_img).unsqueeze(0) clean_img = self.loader(clean_img) clean_img = self.transform(clean_img).unsqueeze(0) return noise_img, clean_img
def plot_matches(img1, kp1, img2, kp2, top_matches, inliers, base_data_path, image_i, image_j, title_append=""): plt.figure(figsize=(10, 5)) img3 = cv2.drawMatches(img1, kp1, img2, kp2, np.array(top_matches)[inliers].tolist(), None, flags=2) plt.imshow(img3) plt.title(" # inliers: " + str(len([x for x in inliers if x])) + " " + title_append) # plt.savefig("../output/feature_match/" + base_data_path.split("/")[3] + "." + str(image_i) + ".png") plt.savefig("../output/feature_match/path_output_1/" + str(image_i) + "." + str(image_j) + ".png") plt.close()
def TrainingImages(): recognizer = cv2.face_LBPHFaceRecognizer.create() harcascade = "haarcascade_frontalface_default.xml" detector = cv2.CascadeClassifier(harcascade) faces, Id = getImagesAndLabels("Trainner.yml") recognizer.train(faces, np.array(Id)) recognizer.save("TrainingImageLabel/Trainner.yml") res = "Image Trained" #+",".join(str(f) for f in Id) notf.configure(text=res)
def plot_confusion_matrix(cm, labels_name, title): cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] plt.imshow(cm, interpolation='nearest') plt.title(title) plt.colorbar() num_local = np.array(range(len(labels_name))) plt.xticks(num_local, labels_name, rotation=90) plt.yticks(num_local, labels_name) plt.ylabel('True label') plt.xlabel('Predicted label')
def img_stack_displacement(orientation, position): """ Calcualte the projection of the image position patient along the axis perpendicular to the images themselves, i.e. along the stack axis. Intended use is for the sorting key to sort a stack of image datasets so that they are in order, regardless of whether the images are axial, coronal, or sagittal, and independent from the order in which the images were read in. :param orientation: List of strings with six elements, the image orientation patient value from the dataset. :param position: List of strings with three elements, the image position value from the dataset. :return: Float of the image position patient along the image stack axis. """ ds_orient_x = orientation[0:3] ds_orient_y = orientation[3:6] orient_x = np.array(list(map(float, ds_orient_x))) orient_y = np.array(list(map(float, ds_orient_y))) orient_z = np.cross(orient_x, orient_y) img_pos_patient = np.array(list(map(float, position))) displacement = orient_z.dot(img_pos_patient) return displacement
def printst(step, seg_source,seg_ed,seg_es,labeled,labeles): mm = seg_source[0, 1, :, :, :] * 1 + seg_source[0, 2, :, :, :] * 2 + seg_source[0, 3, :, :, :] * 3 pt = mm.data.cpu().numpy() # pt = np.transpose(pt, (2, 1, 0)) # pt = start_seg[0, 0, :, :, :].data.cpu().numpy() out = sitk.GetImageFromArray(pt) out.SetSpacing((1, 1, 1)) sitk.WriteImage(out, './state/seg_source' + str(step) + '.nii') # pt=np.argmax(pt[1],axis=1) # out = sitk.GetImageFromArray(seg_source) # out.SetSpacing((1.000, 1.000, 1.000)) # sitk.WriteImage(out, './state/seg-source' + str(step) + '.nii') dice_result_ed = [] mm1 = seg_ed[0, 1, :, :, :] * 1 + seg_ed[0, 2, :, :, :] * 2 + seg_ed[0, 3, :, :, :] * 3 mm2 = labeled[0, 1, :, :, :] * 1 + labeled[0, 2, :, :, :] * 2 + labeled[0, 3, :, :, :] * 3 pt1 = mm1.data.cpu().numpy() pt1 = threshold(pt1) pt2 = mm2.data.cpu().numpy() # print(pt1.shape) # print(pt2.shape) # labeled=labeled.data.cpu.numpy() diceresult_ed = dice(pt1, pt2, nargout=1) dice_result_ed.append(diceresult_ed) dice_result_ed = np.array(dice_result_ed) dice_sum = np.sum(dice_result_ed, axis=0) print('step' + str(step) + 'diceresult_ed:|' + str(dice_sum)) dice_result_es = [] mm1 = seg_es[0, 1, :, :, :] * 1 + seg_es[0, 2, :, :, :] * 2 + seg_es[0, 3, :, :, :] * 3 mm2 = labeles[0, 1, :, :, :] * 1 + labeles[0, 2, :, :, :] * 2 + labeles[0, 3, :, :, :] * 3 pt1 = mm1.data.cpu().numpy() pt1=threshold(pt1) pt2= mm2.data.cpu().numpy() # print(pt1.shape) # print(pt2.shape) # labeled=labeled.data.cpu.numpy() diceresult_es=dice(pt1,pt2,nargout=1) dice_result_es.append(diceresult_es) dice_result_es = np.array(dice_result_es) dice_sum = np.sum(dice_result_es, axis=0) print('step'+str(step)+'diceresult_es:|'+str(dice_sum))
def openFile(self): MainWindow.show() fname, _ = QFileDialog.getOpenFileName( None, 'Fotograf Seciniz', '.', 'Image Files (*.png *.jpg *.jpeg *.bmp )') if fname: with open(fname, "rb") as file: data = np.array(bytearray(file.read())) self.image = cv.imdecode(data, cv.IMREAD_UNCHANGED) self.image = self.olceklendir(self.image) self.openImage()
def server(): re = request.json if "queries" in re: re.get("queries") # "queries" should be two dimensional array requested_query = np.array(re.get("queries")) # TODO: Make it thread safe response = bst.predict(requested_query) return jsonify({"result": response.tolist()}) else: return Response("{'err':'bad request body'}", status=400, mimetype='application/json')
def getImagesAndLabels(path): imagePaths = [os.path.join(path, f) for f in os.listdir(path)] faces = [] Ids = [] for imagePath in imagePaths: pilImage = Image.open(imagePath).convert('L') imageNp = np.array(pilImage, 'uint8') Id = int(os.path.split(imagePath)[-1].split(".")[1]) faces.append(imageNp) Ids.append(Id) return faces, Ids
def run(self): folders = how_many_fatherFolder(self.path) for experiemnt in folders: logging.debug("Folder under analysis -> " + str(experiemnt)) second_path = self.path + experiemnt + "/" res = how_many_folder(second_path) num_folder = len(res) logging.debug("Folder to analise -> " + str(num_folder)) for el in res: path_here = second_path + str(el) + "/" names = [] for i in os.listdir(path_here): if os.path.isfile( os.path.join(path_here, i) ) and 'trajectory-generate-aSs-' in i and ".zip" in i: names.append(i) names = sorted_nicely(names) pops = Populations() # find the trajectories ID and Points trajectories = self.read_trajectory_info(path_here + "trajectory.zip") for tra in trajectories: pops.add_population(Population(tra)) number_of_trajectories = pops.get_number_trajectories() total_distances = [] numb = 0 logging.debug("Analysing Trajectories...") for i in tqdm.tqdm(range(len(names))): name = names[i] # obtain info from the file individuals = self.read_info(path_here + name) # store the msd per trajectory distance_per_trajectories = {} for j in range(number_of_trajectories): distances = [] for indiv in individuals: if indiv.trajectoryID == pops.get_population( j).tra.trajectoryID: distances.append(indiv.MSD) array = np.array(distances) MSD = (np.sum(array)) / len(array) distance_per_trajectories.update({j: MSD}) total_distances.append(distance_per_trajectories) self.print_graph(total_distances, path_here)
def evaluteDataset(filepath, modelType=2, measure=1, cutMethod=1, minRed=0, comporative=True): data = read_csv(filepath) X = np.array(data.ix[:, 0:-1]) y = np.array(data.ix[:, -1]) if (modelType >= 2 or modelType < 0): modelType = ut.datesetType(y) if (comporative): startTime = time.time() acc = ml.modelJudge(X=X, y=y, modelType=modelType, testPerc=0.4, runs=3) endTime = time.time() print "original:", acc, X.shape[1], str(round(endTime - startTime, 3)) + "s" startTime = time.time() rank = fs.featureSelection(X=X, y=y, modelType=modelType, runs=3, processes=0, measure=measure, binMethod=0, cutMethod=cutMethod, minRed=minRed, rrThreshold=0.9, debug=False) endTime = time.time() timefs = round(endTime - startTime, 3) X = np.array(data.ix[:, rank]) startTime = time.time() acc = ml.modelJudge(X=X, y=y, modelType=modelType, testPerc=0.4, runs=3) endTime = time.time() timecf = round(endTime - startTime, 3) print "result:", acc, str(timefs) + "s", str(timecf) + "s", len(rank), rank
def readData(): data_x = pd.read_table("../data/ex4x.dat", sep=' ').values data_y = pd.read_table("../data/ex4y.dat", sep=' ').values x1 = [] x2 = [] x0 = [] y = [] for i in range(len(data_y[:, 3])): x0.append(1.0) x1.append(float(data_x[:, 3][i])) x2.append(float(data_x[:, 6][i])) y.append(int(data_y[:, 3][i])) dataX = np.array([x0, x1, x2]).T dataY = np.array(y).T trainIndex = [] testIndex = [] for train_index, test_index in KFold(n_splits=5, shuffle=True).split(X=dataX): trainIndex.append(train_index) testIndex.append(test_index) return dataX, dataY, trainIndex, testIndex
def manhattan_distance(self, values: str, df): values_array = np.array(values.split(" ")).astype(np.float) column_count = len(df.columns) columns = [] columns = df.columns distance = dict() for j in range(len(df.index)): sum = 0 for n in range(column_count - 1): sum += math.fabs(values_array[n] - df.at[j, columns[n]]) distance[j] = sum return distance
def osm_xml_download(x,y,d=0.025,verbose=False,filename=None,path=''): """downloads the xml output from openstreetmap.org""" url = 'http://api.openstreetmap.org/api/0.6/map' cap = requests.get('http://api.openstreetmap.org/api/capabilities') if verbose: print(cap.text) origin = np.array([x,y]) params = (origin-d).tolist()+(origin+d).tolist() params = ','.join(map(lambda x: str(round(x,4)),params)) # bug in requests lib that doesn't allow easy comma variables string = 'http://api.openstreetmap.org/api/0.6/map?bbox={0}'.format(params) #req = requests.get(url,params={'bbox':params}) req = requests.get(string) if verbose: print(req.url) if filename: with open(os.path.join(path,filename),'w') as fh: fh.write(req.text) return req.text
from pandas import np disease = 'hiv' categories = ['celeb', 'dont_know', 'family', 'himself', 'knows', 'none', 'subject'] dataAdapter = DataAdapter(disease) # 1. Generate training set by splitting the input files multiple files (file per tweet) dataAdapter.create_data(disease) # 2. Load train data from files or cache trainData = dataAdapter.get_data(categories=categories, subset='train') # Postprocessing (urls, numbers and user references replacement) preproccessor = PreProccessor() preproccessor.perform(trainData.data) vectorizer = CountVectorizer(ngram_range=(1, 1), stop_words='english', preprocessor=GetTextFromTweet) matrix = vectorizer.fit_transform(trainData.data) feature_names = vectorizer.get_feature_names() vocab = feature_names model = lda.LDA(n_topics=10, n_iter=500, random_state=1) model.fit(matrix) n_top_words = 10 for i, topic_dist in enumerate(model.topic_word_): np_array_vocab = np.array(vocab) np_sorted_by_topic_dist = np.argsort(topic_dist) topic_words = np_array_vocab[np_sorted_by_topic_dist][:-n_top_words:-1] # Slice take only n highest. print('Topic {}: {}'.format(i, ' '.join(topic_words)))
# TEST_PORTION = 0.25 # test_set_i = np.array(sorted(random.sample(xrange(N), int(TEST_PORTION * N)))) # train_set_i = np.array([i for i in range(N) if i not in test_set_i]) # ts_ids = df.index.values[train_set_i] # ts_targets = df.target.values # ts_features = df[train_feature_names].values train_label_set = sample_label_set train_feature_names = df.columns[:-1] # 93 features, last col is target (true classification) train_ids = df.index.values[train_set_i] train_targets = df.target.values train_features = df[train_feature_names].values del df test_target = np.array(df.target)[test_set_i] test_set = df.values[test_set_i] train_set = data[train_set_i] train_target = np.array(digits.target)[train_set_i] X_train = X[:train_samples] X_test = X[train_samples:] y_train = y[:train_samples] y_test = y[train_samples:] test = pd.read_csv(DATA_PATH + "test.csv") # only realy need this to make sure you get the Category labels right and in the right order sample_submission = pd.read_csv(DATA_PATH + "sampleSubmission.csv") sample_label_set = sample_submission.columns[1:] # e.g. Class_1, Class_2, ...