def main(): if len(sys.argv) != 2: print "Incorrect usage. Use as follows:" print "python main.py <STOCK_NAME>" return stock = sys.argv[1].lower() print "=" * 100 print "FETCHING DATA FOR STOCK: " + stock print "=" * 100 data = ed.extract_data(stock) print print print "=" * 100 print "GENERATING AUDIO FILE" print "=" * 100 beat_box.generate(stock, data) print print print "=" * 100 print "PUSHING TO GITHUB" print "=" * 100 github_sync.sync_with_github() print print print "=" * 100 print "DONE :)" print "=" * 100
def main(): if len(sys.argv) != 2: print "Incorrect usage. Use as follows:" print "python main.py <STOCK_NAME>" return stock = sys.argv[1].lower() print "="*100 print "FETCHING DATA FOR STOCK: " + stock print "="*100 data = ed.extract_data(stock) print print print "="*100 print "GENERATING AUDIO FILE" print "="*100 beat_box.generate(stock, data) print print print "="*100 print "PUSHING TO GITHUB" print "="*100 github_sync.sync_with_github() print print print "="*100 print "DONE :)" print "="*100
def proj_length(): session['length'] = int(request.form['length']) #length in periods session['length_years'] = int(math.ceil(session['length']/float(session['periods']))) if session['subnational'] == False: data, session['pop'] = ed.extract_data(session['iso3'], start=START) for k in ['decline', 'detected_mdr', 'total_mdr', 'p_ret_rel', 'p_new', 'prob_mdr_new', 'prob_mdr_re', 'prev_100_k']: session[k] = data[k] else: session['pop'] = {y: 0 for y in range(START, 2031)} for k in ['decline', 'detected_mdr', 'total_mdr', 'p_ret_rel', 'p_new', 'prob_mdr_new', 'prob_mdr_re', 'prev_100_k']: session[k] = None # years for which we need population data pop_years = range(session['hist_years'][0], session['hist_years'][-1]+ session['length_years'] + 1) # divide years in 2 columns for layout reasons pop_years_1 = pop_years[:len(pop_years)/2 + 1] pop_years_2 = pop_years[len(pop_years)/2 + 1:] # save as all_years to be consistent with non-manual sessions session['all_years'] = pop_years return render_template("pop_estimates.html", pop_years_1=pop_years_1, pop_years_2=pop_years_2, pop_dict=session['pop'], region=session['subnational'])
def data_country(): # session using WHO data (not manually inserted) session['manual'] = False session['subnational'] = False # WHO data is yearly (1 period per year) session['periods'] = 1 session['country'] = request.form['country'] with open('tbprojections/country_names.pickle', 'r') as f: names = cPickle.load(f) for (k, v) in names.items(): if v == session['country']: session['iso3'] = k # if the user selected a country for which enough data if ut._verify_ISO3(session['iso3']): # Extract the data #================= #'data' is a dictionary (see extract_data.py) #'pop' is a dictionary containing UN estimates for population # Inside 'data' #'p_new': proportion new cases among all #'p_ret_rel': proportion retreat - relapse cases among all #'prob_mdr_new': probability new case having MDR #'prob_mdr_re': probability retreat - relapse case having MDR #'decline': estimated decline in incidence #'prev_100_k': estimated prevalence for 100k population #'detected_mdr': detected MDR cases in last year #'total_mdr': estimated MDR cases in last year session['all_data'], session['pop'] = ed.extract_data(session['iso3'], start=START) #covert keys to integers (issue with storing in 'session') session['pop'] = {int(k): int(v) for (k, v) in session['pop'].items()} # Projection length # ================= session['length'] = int(request.form['length']) #in periods session['length_years'] = session['length'] #in years # Years for which we need population data are all_years (history + # projection years) act_year = session['all_data']['years'][-1][0] first_year = max(START, session['all_data']['years'][0][0]) session['all_years'] = range(first_year, act_year+session['length']+1) # Divide years in 2 columns for layout reasons pop_years_1 = session['all_years'][:len(session['all_years'])/2 + 1] pop_years_2 = session['all_years'][len(session['all_years'])/2 + 1:] # Display following page return render_template("pop_estimates.html", pop_years_1=pop_years_1, pop_years_2=pop_years_2, region=False, pop_dict=session['pop']) # if the user selected a non valid country ... else: return render_template("error.html", error=e.error_country())
def index(): form = AvatarForm() if request.method == 'GET': return render_template('index.html', form=form) url = request.form['url'] # img = request.form['img'] return extract_data(url)
def contact(phone_number, country): country_data = extract_data()[country] try: result = sendSMS(phone_number, country, country_data) print(result) response = jsonify("Success!") except: response = jsonify( "Failed to send message - SMS API may be overloaded") response.headers.add("Access-Control-Allow-Origin", "*") return response
def price_watch(url): product_name, product_price, currency, avail = extract_data(url) timestamp = str(datetime.now()) json_dict = { "Product name": product_name, "Product price": product_price, "Currency": currency, "Availability": avail, "URL": url, "Timestamp": timestamp } print(f"{product_name:50}{product_price:8} {currency:5}({avail})") return json_dict
def train_model(): extract_data.extract_data() print "Saving a train LMDB into the {0} directory".format(TRAIN_LMDB) create_lmdb(TRAIN_FILE, EXTRACT_DIR, TRAIN_LMDB) print "Saving a test LMDB into the {0} directory".format(TEST_LMDB) create_lmdb(TEST_FILE, EXTRACT_DIR, TEST_LMDB) print "Creating a train prototxt file with batch size {0} into {1}".format( TRAIN_BATCH_SIZE, TRAIN_LMDB) with open(TRAIN_PROTOTXT, 'w') as f: f.write(str(snooker_net(TRAIN_LMDB, TRAIN_BATCH_SIZE))) print "Creating a test prototxt file with batch size {0} into {1}".format( TEST_BATCH_SIZE, TEST_LMDB) with open(TEST_PROTOTXT, 'w') as f: f.write(str(snooker_net(TEST_LMDB, TEST_BATCH_SIZE))) print "Creating a solver prototxt into the {0} file".format( SOLVER_PROTOTXT) with open(SOLVER_PROTOTXT, 'w') as f: f.write(str(create_solver(TRAIN_PROTOTXT, TEST_PROTOTXT))) print "Training the model" results = train(SOLVER_PROTOTXT, METHOD) print "Saving the trained model to {0} and {1}".format( MODEL_OUTPUT, MODEL_PTX) results.net.save(MODEL_OUTPUT) with open(MODEL_PTX, 'w') as f: f.write( str(snooker_net(TRAIN_LMDB, TRAIN_BATCH_SIZE, output_type="Deploy"))) print "Saving accuracy chart to {0}".format(ACCURACY_CHART) save_accuracy(results, ACCURACY_CHART)
def graph_eval(dataset_loc, input_graph_def, graph, input_node, output_node, batchsize): input_graph_def.ParseFromString(tf.gfile.GFile(graph, "rb").read()) # Extact data from csv files again training_dataset_filepath = '%ssign_mnist_train/sign_mnist_train.csv' % dataset_loc testing_dataset_filepath = '%ssign_mnist_test/sign_mnist_test.csv' % dataset_loc train_data, train_label, val_data, val_label, testing_data, testing_label = extract_data( training_dataset_filepath, testing_dataset_filepath, 0) total_batches = int(len(testing_data) / batchsize) #Import Graph tf.import_graph_def(input_graph_def, name='') # Get input placeholders & tensors images_in = tf.get_default_graph().get_tensor_by_name(input_node + ':0') labels = tf.placeholder(tf.int32, shape=[None, 25]) # get output tensors logits = tf.get_default_graph().get_tensor_by_name(output_node + ':0') predicted_logit = tf.argmax(input=logits, axis=1, output_type=tf.int32) ground_truth_label = tf.argmax(labels, 1, output_type=tf.int32) # Define the metric and update operations tf_metric, tf_metric_update = tf.metrics.accuracy( labels=ground_truth_label, predictions=predicted_logit, name='acc') with tf.Session() as sess: progress = ProgressBar() sess.run(tf.initializers.global_variables()) sess.run(tf.initializers.local_variables()) # process all batches for i in progress(range(0, total_batches)): # fetch a batch from validation dataset x_batch, y_batch = testing_data[i*batchsize:i*batchsize+batchsize], \ testing_label[i*batchsize:i*batchsize+batchsize] # Run graph for accuracy node feed_dict = {images_in: x_batch, labels: y_batch} acc = sess.run(tf_metric_update, feed_dict) print('Graph accuracy with validation dataset: {:1.4f}'.format(acc)) return
def train_nn(dataset_loc, train_bool, num_test): if (train_bool): #Set Parameters DATASET_SIZE = 27455 BATCHSIZE = 32 EPOCHS = 10 LEARN_RATE = 0.0002 DECAY_RATE = 3e-6 NUM_IMAGES = 10 #Pre-processes data and trains the neural network #Get the column names form the first row of the csv file training_dataset_filepath = '%ssign_mnist_train/sign_mnist_train.csv' % dataset_loc testing_dataset_filepath = '%ssign_mnist_test/sign_mnist_test.csv' % dataset_loc train_data, train_label, val_data, val_label, testing_data, testing_label = extract_data( training_dataset_filepath, testing_dataset_filepath, num_test) model = neural_network() model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=LEARN_RATE, decay=DECAY_RATE), metrics=['accuracy']) model.fit(train_data, train_label, batch_size=BATCHSIZE, shuffle=True, epochs=EPOCHS, validation_data=(val_data, val_label)) #Evaluate Model Accracy scores = model.evaluate(testing_data, testing_label, batch_size=BATCHSIZE) print('Loss: %.3f' % scores[0]) print('Accuracy: %.3f' % scores[1]) # save weights, model architecture & optimizer to an HDF5 format file model.save(os.path.join('./train', 'keras_trained_model.h5')) print('Finished Training') print('Convert Keras to TF') keras2tf('train/keras_trained_model.h5', 'train/tfchkpt.ckpt', 'train')
def generateReport(self): popup = QMessageBox() popup.setIcon(QMessageBox.Critical) popup.setWindowTitle("RBA Generator") if self.logFile == None: popup.setText("Select a file") popup.exec() return if self.puList.currentIndex() == 0: popup.setText("Select valid PU") popup.exec() return if self.duList.currentIndex() == 0: popup.setText("Select valid DU") popup.exec() return if self.accountsList.currentIndex() == 0: popup.setText("Select valid Account") popup.exec() return #call function to generate report of the file and get the file name name_list = [ self.puList.currentText(), self.duList.currentText(), self.accountsList.currentText() ] Ui_MainWindow.pu_du_list = name_list self.finalFileName = extract_filename.extract_filename(self.logFile) self.finalFileName = self.finalFileName + "_utilization_report_" self.finalFileName = extract_data.extract_data(name_list, self.logFile, self.finalFileName) Ui_MainWindow.globalFilename = self.finalFileName self.reportFileName.setText(self.finalFileName) self.downloadReport.setEnabled(True) self.emailReport.setEnabled(True) self.downloadButton.setEnabled(True)
def train_nn(dataset_loc, train_bool, num_test): if (train_bool): DATASET_SIZE = 27455 BATCHSIZE = 32 EPOCHS = 3 LEARN_RATE = 0.0001 DECAY_RATE = 1e-6 NUM_IMAGES = 10 training_dataset_filepath = '%smnist/sign_mnist_train/sign_mnist_train.csv' % dataset_loc testing_dataset_filepath = '%smnist/sign_mnist_test/sign_mnist_test.csv' % dataset_loc train_data, train_label, val_data, val_label, testing_data, testing_label = extract_data( training_dataset_filepath, testing_dataset_filepath, num_test) model = neural_network() model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=LEARN_RATE, decay=DECAY_RATE), metrics=['accuracy']) model.fit(train_data, train_label, batch_size=BATCHSIZE, shuffle=True, epochs=EPOCHS, validation_data=(val_data, val_label)) scores = model.evaluate(testing_data, testing_label, batch_size=BATCHSIZE) print('Loss: %.3f' % scores[0]) print('Accuracy: %.3f' % scores[1]) model.save(os.path.join('./train', 'keras_trained_model.h5')) print('Finished Training') print('Convert Keras to TF') keras2tf('train/keras_trained_model.h5', 'train/tfchkpt.ckpt', 'train')
def classify(): model, total_wise, total_future = train_model() test_data, test_labels = extract_data(data_filename, label_filename) results = [] correct_counter = 0 for i, sentence in enumerate(test_data): array_sentence = sentence.split(' ') wise_probability, future_probability = compute_sentence_probability( model, total_wise, total_future, array_sentence) class_prediction = predict_class(wise_probability, future_probability) results.append({ "sentence": sentence, "prediction": class_prediction, "actual": test_labels[i] }) if (class_prediction == int(test_labels[i])): correct_counter += 1 for result in results: print(result) print("Accuracy: {0:.2f}%".format( (correct_counter / len(test_labels)) * 100))
def lambda_handler(event, context): """ Parameters ---------- event: dict, required keys: environment: str in {'production', 'testing'} determines which download URLs to use context: object, required Lambda Context runtime methods and attributes Returns ------ Invocation is Asynchronous """ env = event["environment"] ny_times_data, jh_data, prev_data = extract_data( env, BUCKET_NAME, KEY, s3) transformed_data, new_records, updated_records = transform_data( ny_times_data, jh_data, prev_data) load_data(env, BUCKET_NAME, KEY, CHANGE_LOG, transformed_data, new_records, updated_records, s3) if prev_data is None: return { "Status": "New Data Loaded", "New Records": str(len(new_records)), "Updated Records": "--" } return { "Status": "Daily Data Updated", "New Records": str(len(new_records)), "Updated Records": str(len(updated_records)) }
def histogram_bounds(): #Comment out/delete the feature you want to remove x, y = ed.extract_data() x1 = np.zeros((len(x), 1)) x2 = np.zeros((len(x), 1)) x3 = np.zeros((len(x), 1)) x4 = np.zeros((len(x), 1)) for i in range(len(x)): x1[i] = x[i][0] x2[i] = x[i][1] x3[i] = x[i][2] x4[i] = x[i][3] min_x1, max_x1 = mf.find_smallest_and_biggest_value(x1) min_x2, max_x2 = mf.find_smallest_and_biggest_value(x2) min_x3, max_x3 = mf.find_smallest_and_biggest_value(x3) min_x4, max_x4 = mf.find_smallest_and_biggest_value(x4) min_list = [min_x1, min_x2, min_x3, min_x4] max_list = [max_x1, max_x2, max_x3, max_x4] return min_list, max_list
def generate_full_csv(path, write_file): all_wav_files_ordered = [] wav_file_sizes = [] text_transcriptions = [] speakers = [] sexes = [] births = [] youths = [] cnt = 0 counter_short_long = 0 for path, subdirs, files in os.walk(path): for name in files: # if len(all_wav_files_ordered) > 20000: # break if not name.endswith('spl'): continue spl_path = os.path.join(path, name) speaker, name, sex, region_of_birth, region_of_youth, wavs_and_transcriptions = extract_data(spl_path) if not sex: continue wav_path = spl_path.replace('.spl', '/').replace('data', 'speech') for key in wavs_and_transcriptions: final_path = f'{wav_path}{key}' if not os.path.isfile(final_path): continue if final_path in excluded_train_wavs: print(final_path) continue # f = sf.SoundFile(final_path) # duration = len(f) / f.samplerate # if duration < 4: # counter_short_long+=1 # continue cnt += 1 if cnt % 10000 == 0: print(cnt) # if train then those produce inf in loss #if cnt >= 60000 and cnt <= 70000: # print(cnt) # continue #if cnt >= 110000 and cnt <= 150000: # print(cnt) # continue #if test the those produce inf loss if cnt > 15000 and cnt < 19000: continue lower_text = wavs_and_transcriptions[key].lower() \ .replace(',', ' ').replace('è', "e").replace('é', "e").replace("ÿ", "y").replace("ü", "u").replace( 'î', 'i') for c in string.punctuation: lower_text = lower_text.replace(c, " ") #features = audiofile_to_input_vector(final_path, numcep, numcontext) #features_len = len(features) - 2 * numcontext #transcript = text_to_char_array(lower_text, alphabet) #if features_len < len(transcript): # counter_short_long += 1 # print(final_path) # continue speakers.append(speaker) all_wav_files_ordered.append(final_path) wav_file_sizes.append(os.path.getsize(final_path)) text_transcriptions.append(lower_text) sexes.append(sex) births.append(region_of_birth) youths.append(region_of_youth) group_births = Counter(births) # num_test_data = int(test_percent * len(all_wav_files_ordered)) train_wav_indices = [] dev_wav_indices = [] for region in group_births.keys(): # dict_with_region_indices[region] = np.where(group_births == region)[0] indices = np.where(np.array(births) == region)[0] #if 'Stockholm' in region: # indices = indices[0:20000] #elif 'Göteborg' in region: # indices = indices[0:10000] #else: indices = indices[0:5000] num_train_data = int(training_percent * len(indices)) train_indices = np.array(indices[0:num_train_data]) dev_indices = np.array(indices[num_train_data:]) train_wav_indices.extend([i for i in train_indices]) dev_wav_indices.extend([i for i in dev_indices]) train_wav_files = [all_wav_files_ordered[i] for i in train_wav_indices] dev_wav_files = [all_wav_files_ordered[i] for i in dev_wav_indices] # test_wav_files = all_wav_files_ordered[num_train_data + num_dev_data:] # train_wav_file_sizes = [wav_file_sizes[i] for i in train_wav_indices] dev_wav_file_sizes = [wav_file_sizes[i] for i in dev_wav_indices] # test_wav_file_sizes = wav_file_sizes[num_train_data + num_dev_data:] # train_transcriptions_files = [text_transcriptions[i] for i in train_wav_indices] dev_transcriptions_files = [text_transcriptions[i] for i in dev_wav_indices] # test_transcriptions_files = text_transcriptions[num_train_data + num_dev_data:] train_dict = {"wav_filename": train_wav_files, "wav_filesize": train_wav_file_sizes, "transcript": train_transcriptions_files} val_dict = {"wav_filename": dev_wav_files, "wav_filesize": dev_wav_file_sizes, "transcript": dev_transcriptions_files} # test_dict = {"wav_filename": test_wav_files, # "wav_filesize": test_wav_file_sizes, # "transcript": test_transcriptions_files} # #write_to_csv(filename='/home/guest/Desktop/DeepSpeech/data/TRAIN/train.csv', dictionary=train_dict) #write_to_csv(filename='/home/guest/Desktop/DeepSpeech/data/DEV/dev.csv', dictionary=val_dict) # # write_to_csv(filename='/home/guest/Desktop/DeepSpeech/data/TEST/test.csv', dictionary=test_dict) # unique_transcr = set(text_transcriptions) # list_ = list(unique_transcr) # # print('text : ',len(text_transcriptions)) # print('unique : ',len(list_)) # # final_dict = {"transcript": list_} # write_to_csv_2(filename='/home/guest/Desktop/Dataset16/sve.16khz.0467-1/0467_sv_train_1/stah2.csv', dictionary=final_dict) final_dict = {"wav_filename": all_wav_files_ordered, "wav_filesize": wav_file_sizes, "transcript": text_transcriptions} write_to_csv(filename=write_file, dictionary=train_dict) print('============ALL=============') group_sexes = Counter(sexes) group_births = Counter(births) group_youths = Counter(youths) print('=========SEXES==========') for sex in group_sexes.keys(): print("key : " + sex + 'with value : ' + str(group_sexes[sex])) # print(group_sexes.values()) # print(group_sexes.keys()) print('===================') print('=========births==========') for sex in group_births.keys(): print("key : " + sex + 'with value : ' + str(group_births[sex])) # print(group_births.values()) # print(group_births.keys()) print('===================') print('=========youths==========') for sex in group_youths.keys(): print("key : " + sex + 'with value : ' + str(group_youths[sex])) # print(group_births.values()) # print(group_youths.values()) # print(group_youths.keys()) print('===================') print(counter_short_long) print('============TRAIN=============') group_sexes = Counter([sexes[i] for i in train_wav_indices]) group_births = Counter([births[i] for i in train_wav_indices]) group_youths = Counter([youths[i] for i in train_wav_indices]) print('=========SEXES==========') for sex in group_sexes.keys(): print("key : " + sex + 'with value : ' + str(group_sexes[sex])) # print(group_sexes.values()) # print(group_sexes.keys()) print('===================') print('=========births==========') for sex in group_births.keys(): print("key : " + sex + 'with value : ' + str(group_births[sex])) # print(group_births.values()) # print(group_births.keys()) print('===================') print('=========youths==========') for sex in group_youths.keys(): print("key : " + sex + 'with value : ' + str(group_youths[sex])) # print(group_births.values()) # print(group_youths.values()) # print(group_youths.keys()) print('===================') print('============DEV=============') group_sexes = Counter([sexes[i] for i in dev_wav_indices]) group_births = Counter([births[i] for i in dev_wav_indices]) group_youths = Counter([youths[i] for i in dev_wav_indices]) print('=========SEXES==========') for sex in group_sexes.keys(): print("key : " + sex + 'with value : ' + str(group_sexes[sex])) # print(group_sexes.values()) # print(group_sexes.keys()) print('===================') print('=========births==========') for sex in group_births.keys(): print("key : " + sex + 'with value : ' + str(group_births[sex])) # print(group_births.values()) # print(group_births.keys()) print('===================') print('=========youths==========') for sex in group_youths.keys(): print("key : " + sex + 'with value : ' + str(group_youths[sex])) # print(group_births.values()) # print(group_youths.values()) # print(group_youths.keys()) print('===================')
def data(filename=None): response = jsonify(extract_data()) response.headers.add("Access-Control-Allow-Origin", "*") return response
a、多个录音,还有评论,选取第一个? b、是否是空白的 ''' import json from phrasing_url import phrasing_url from extract_data import extract_data from download import download initial = "https://tft.rocks/topic/" i = 1 tft_list = [] list_issue = [216, 471, 491, 655, 656, 657, 658] while i < 659: if i in list_issue: url_issue = initial + str(i) dic = {"refs": i, "url": url_issue} else: html = phrasing_url(initial, i) dic = extract_data(html, i) print(dic) tft_list.append(dic) i = i + 1 #print(data) #print(dic) with open('tftrocks.txt', 'w', encoding='utf-8') as f: #f.write(json.dumps(dic,ensure_ascii=False,indent=2)) json.dump(tft_list, f, ensure_ascii=False, indent=2) #f.write(json.dumps(i))
def run_interrupt(file): """ run all sci run plot routines input: file --- input file name. if it is not given, the script will ask output: <plot_dir>/*.png --- ace data plot <ephin_dir>/*.png --- ephin data plot <goes_dir>/*.png --- goes data plot <xmm_dir>/*.png --- xmm data plot <html_dir>/*.html --- html page for that interruption <web_dir>/rad_interrupt.html --- main page """ # #--- check input file exist, if not ask # test = exc_dir + file if not os.path.isfile(test): file = raw_input('Please put the intrrupt timing list: ') if file == 'test': # #--- if this is a test case, prepare for the test # comp_test = 'test' file = test_web_dir + 'test_date' else: comp_test = 'NA' # #--- extract data # print "Extracting Data" edata.extract_data(file) f = open(file, 'r') data = [line.strip() for line in f.readlines()] f.close() for ent in data: atemp = re.split('\s+|\t+', ent) event = atemp[0] start = atemp[1] stop = atemp[2] gap = atemp[3] type = atemp[4] print "PLOTING: " + str(event) # #--- plot Ephin data # print "EPHIN" ephin.plotEphinMain(event, start, stop, comp_test) # #---- plot GOES data # print "GOES" goes.plotGOESMain(event, start, stop, comp_test) # #---- plot other radiation data (from NOAA) # print "NOAA" noaa.startACEPlot(event, start, stop, comp_test) # #---- extract and plot XMM data # print "XMM" xmm.read_xmm_and_process(event) # #---- create indivisual html page # print "HTML UPDATE" srphtml.printEachHtmlControl(file) # #---- update main html page # srphtml.printEachHtmlControl()
def chisquareintf(root, name, temp, dist, powlist, denlist, inclist, verbose=0): cspdata = np.zeros((len(powlist), len(denlist))) bestinc = np.zeros((len(powlist), len(denlist))) # loop through densities & power law indexes for r in np.arange(0, len(powlist)): for c in np.arange(0, len(denlist)): incchi = [] currbestchisq = 1000000 # arbitrary, just has to be very large currbestpow = 100 # arbitrary currbestden = 100 # arbitrary currbestinc = 100 # arbitrary for k in np.arange(0, len(inclist)): imagefile = open( root + 'IMAGE.' + name + '_t' + temp + '_w0d0_j0h0_' + powlist[r] + '_' + denlist[c] + '_rd12p0_cont_alp0p0_i' + inclist[k], 'r') templine = imagefile.readline() # get dimensions width = int(templine[18:23]) height = int(templine[26:]) imagefile.readline() imagefile.readline() image = np.zeros((height, width)) for line in imagefile: w = int(line[0:5]) h = int(line[6:10]) intensity = line[65:75] image[h - 1][w - 1] = intensity imagefile.close() # norm to 1 image = image / np.sum(image) # chi square statistic realdata, imagdata = ext.extract_data(image, u, v, im_scale=0.007, cubic=1, nohan=1) modelvis2 = realdata**2 + imagdata**2 chisq = 1 / float(np.shape(u)[0] - 3) * sum( (vis2data - modelvis2)**2 / (0.05 * vis2data)**2) incchi.append(chisq) # pick best inclination if np.abs(chisq - 1) < np.abs(currbestchisq - 1): currbestchisq = chisq currbestpow = powlist[r] currbestden = denlist[c] currbestinc = inclist[k] if verbose == 1: print powlist[r], denlist[c], '\n', incchi, '\n' # enter chi square and best inclination cspdata[r, c] = np.min(incchi) bestinc[r, c] = np.float(currbestinc) print "\nchi square grid" print np.flipud(cspdata) # table of densities vs power law indexes fig, csp = plt.subplots() heatmap = csp.pcolor(np.abs(1 - 1. / cspdata), cmap=plt.cm.hot_r) # gray_r : white is low chisq (good), black is high chisq (bad) # hot_r : light is low, dark is high # rainbow_r : red is low, blue is high csp.set_xticks(np.arange(cspdata.shape[1]) + 0.5, minor=False) csp.set_yticks(np.arange(cspdata.shape[0]) + 0.5, minor=False) csp.set_xticklabels(denlist, fontweight='bold', minor=False) csp.set_yticklabels(powlist, fontweight='bold', minor=False) # print chi-squared statistic and best inclination on each block for r in range(cspdata.shape[0]): for c in range(cspdata.shape[1]): csp.text(c + 0.5, r + 0.5, '{0:.6f}'.format(cspdata[r, c]), horizontalalignment='center', verticalalignment='bottom') csp.text(c + 0.5, r + 0.5, 'Inc = ' + str(bestinc[r, c]), horizontalalignment='center', verticalalignment='top') plt.title('Chi-Square Plot - H-band Interferometry', fontsize=16, fontweight='bold') plt.xlabel('Initial Density (rho_0)', fontsize=12, fontweight='bold') plt.ylabel('Power Law Index', fontsize=12, fontweight='bold') plt.show()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--max_epochs', default=20, type=int, help='no. of epochs') parser.add_argument('--batch_size', default=64, type=int, help='batch size') parser.add_argument('--patience', default=5, type=int) parser.add_argument('--cos_sim_dim', default=20, type=int) parser.add_argument('--lr', default=0.005, type=float) parser.add_argument('--debug', action='store_true') parser.add_argument('--is_pretrained', action='store_true') parser.add_argument('--embed_dim', default=300, type=int, help='embedding dimension') args = parser.parse_args() print(args) # load_and_split_data(file_path='./data/train.csv') train_data, val_data, test_data = extract_data(file_path='./data/train.csv') device = torch.device("cuda" if torch.cuda.is_available() else "cpu") best_val_loss = float("inf") best_val_acc = 0.0 epochs = args.max_epochs # The number of epochs best_model = None best_epoch = 0 train_set = QQPDataset(train_data, split='train', vocab=None, word2idx=None, pre_process=None, device=device, debug=args.debug) val_set = QQPDataset(val_data, split='val', vocab=train_set.vocab, word2idx=train_set.word2idx, pre_process=None, device=device, debug=args.debug) test_set = QQPDataset(test_data, split='test', vocab=train_set.vocab, word2idx=train_set.word2idx, pre_process=None, device=device, debug=args.debug) # use only first time for loading GloVe # parse_n_store_glove(file_path='./data') train_dataloader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True) val_dataloader = DataLoader(val_set, batch_size=args.batch_size, shuffle=False) test_dataloader = DataLoader(test_set, batch_size=args.batch_size, shuffle=False) vocab_size = len(train_set.vocab) K = args.cos_sim_dim if args.is_pretrained: # use only once word2GloVe = create_word2GloVe_dict(file_path='./data') word2GloVe = pickle.load(open(f'./data/word2GloVe_dict.pkl', 'rb')) # generate GloVe embeddings for words in vocabulary vocab_embeddings = get_glove_embeddings(train_set.vocab, word2GloVe) vocab_embeddings = torch.from_numpy(vocab_embeddings).to(device) model = BiMPM(vocab_size, embed_dim=None, weight_matrix=vocab_embeddings, hidden_size=100, K=K) else: embed_dim = args.embed_dim model = BiMPM(vocab_size, embed_dim, weight_matrix=None, hidden_size=100, K=K) model.to(device) lr = args.lr # learning rate optimizer = torch.optim.Adam(model.parameters(), lr=lr) # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', factor=0.1, # patience=3, verbose=True) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.1) # for i_batch, sample_batched in enumerate(dataloader): train_accs = [] val_accs = [] train_losses = [] val_losses = [] best_model = None k = 0 for epoch in range(1, epochs + 1): epoch_start_time = time.time() train_loss, train_acc = train(epoch, train_dataloader, model, optimizer) train_accs.append(train_acc) train_losses.append(train_loss) val_loss, val_acc, y_true, y_pred = evaluate(model, val_dataloader) val_accs.append(val_acc) val_losses.append(val_loss) print('-' * 89) print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {} | '.format(epoch, (time.time( ) - epoch_start_time), val_loss)) print('|Epoch {:3d} | valid accuracy {}'.format(epoch, val_acc)) print('-' * 89) scheduler.step() if val_acc > best_val_acc: best_val_loss = val_loss best_val_acc = val_acc best_model = model best_epoch = epoch torch.save(model.state_dict(), "./best_model_lstm.pt") pickle.dump(y_true, open(f'best_val_true_labels.pkl', 'wb')) pickle.dump(y_pred, open(f'best_val_pred_labels.pkl', 'wb')) k = 0 elif k < args.patience: k += 1 else: break # scheduler.step() print('Best val loss: {} | acc: {} at epoch {}'.format( best_val_loss, best_val_acc, best_epoch)) test_loss, test_acc, y_true, y_pred = evaluate(best_model, test_dataloader) print('Test | loss: {} | acc: {}'.format( test_loss, test_acc)) pickle.dump(y_true, open(f'test_true_labels.pkl', 'wb')) pickle.dump(y_pred, open(f'test_pred_labels.pkl', 'wb')) log_results = {'train_acc': train_accs, 'train_loss': train_losses, 'val_acc': val_accs, 'val_loss': val_losses, 'best_loss': best_val_loss, 'best_acc': best_val_acc, 'test_loss': test_loss, 'test_acc': test_acc } pickle.dump(log_results, open(f'log_results.pkl', 'wb'))
def run_statistics(file_name, graph_name, parameter, axis): # excel initialize e = excel_tools() file_dir = 'C:\\Users\\Taire\\Desktop\\TylerPapers\\data_workspace\\{}_statistics.xls'.format( file_name) # mean and std vectors for each group and run exclusion_parameters = {} # wide group normal inclusion_parameters = { 'group': 'wide', 'gait': 'normal', 'axis': axis, 'parameter': parameter } extracted, idx = extract_data(data, inclusion_parameters, exclusion_parameters) constant_list, wide_normal = part_compute(extracted, 'name', np.mean) _, wide_normal_std = part_compute(extracted, 'name', np.std, ddof=1) # write excel e.write_column(e.mean_std, 'participant', constant_list) e.write_column(e.mean_std, 'wide_normal_means', wide_normal) e.write_column(e.mean_std, 'wide_normal_std', wide_normal_std) # wide group toe-in inclusion_parameters = { 'group': 'wide', 'gait': 'toe-in', 'axis': axis, 'parameter': parameter } extracted, idx = extract_data(data, inclusion_parameters, exclusion_parameters) _, wide_in = part_compute(extracted, 'name', np.mean) _, wide_in_std = part_compute(extracted, 'name', np.std, ddof=1) # write excel e.write_column(e.mean_std, '', ['']) e.write_column(e.mean_std, 'wide_in_means', wide_in) e.write_column(e.mean_std, 'wide_in_std', wide_in_std) # narrow group normal inclusion_parameters = { 'group': 'narrow', 'gait': 'normal', 'axis': axis, 'parameter': parameter } extracted, idx = extract_data(data, inclusion_parameters, exclusion_parameters) constant_list, narrow_normal = part_compute(extracted, 'name', np.mean) _, narrow_normal_std = part_compute(extracted, 'name', np.std, ddof=1) # write excel e.write_column(e.mean_std, '', ['']) e.write_column(e.mean_std, 'participant', constant_list) e.write_column(e.mean_std, 'narrow_normal_means', narrow_normal) e.write_column(e.mean_std, 'narrow_normal_std', narrow_normal_std) # wide group toe-in inclusion_parameters = { 'group': 'narrow', 'gait': 'toe-out', 'axis': axis, 'parameter': parameter } extracted, idx = extract_data(data, inclusion_parameters, exclusion_parameters) _, narrow_out = part_compute(extracted, 'name', np.mean) _, narrow_out_std = part_compute(extracted, 'name', np.std, ddof=1) # write excel e.write_column(e.mean_std, '', ['']) e.write_column(e.mean_std, 'narrow_out_means', narrow_out) e.write_column(e.mean_std, 'narrow_out_std', narrow_out_std) # determine amount of rounding if parameter in ['KNEE_IMPULSE', 'KNEE_MIN_Y']: round_dec = 4 else: round_dec = 1 # statistical analysis wide comparison _, wide_p = ttest_rel(wide_normal, wide_in) wide_d = cohens_d.within_group(wide_normal, wide_in) wide_normal_mean = str(np.round(np.mean(wide_normal), decimals=round_dec)) wide_normal_std = str( np.round(np.std(wide_normal, ddof=1), decimals=round_dec)) wide_in_mean = str(np.round(np.mean(wide_in), decimals=round_dec)) wide_in_std = str(np.round(np.std(wide_in, ddof=1), decimals=round_dec)) wide_stat = [ wide_normal_mean + ' ± ' + wide_normal_std, wide_in_mean + ' ± ' + wide_in_std, str(np.round(wide_p, decimals=3)), str(np.round(wide_d, decimals=3)) ] # write excel e.write_row(e.statistics, '', ['habitual', 'toe-in', 'P', 'd']) e.write_row(e.statistics, 'wide_stat', wide_stat) e.write_row(e.statistics, '', ['']) # graph data graph_dir = 'C:\\Users\\Taire\\Desktop\\TylerPapers\\data_workspace\\{}'.format( file_name + '_wide') title = 'P = {}, d = {}'.format(wide_stat[-2], wide_stat[-1]) plot_comparisons(title, graph_name, 'Habitual \n FPA', 'Toe-in \n FPA', np.mean(wide_normal), np.std(wide_normal, ddof=1), np.mean(wide_in), np.std(wide_in, ddof=1), graph_dir, file_type='png') # statistical analysis narrow comparison _, narrow_p = ttest_rel(narrow_normal, narrow_out) narrow_d = cohens_d.within_group(narrow_normal, narrow_out) narrow_normal_mean = str( np.round(np.mean(narrow_normal), decimals=round_dec)) narrow_normal_std = str( np.round(np.std(narrow_normal, ddof=1), decimals=round_dec)) narrow_out_mean = str(np.round(np.mean(narrow_out), decimals=round_dec)) narrow_out_std = str( np.round(np.std(narrow_out, ddof=1), decimals=round_dec)) narrow_stat = [ narrow_normal_mean + ' ± ' + narrow_normal_std, narrow_out_mean + ' ± ' + narrow_out_std, str(np.round(narrow_p, decimals=3)), str(np.round(narrow_d, decimals=3)) ] # write excel e.write_row(e.statistics, '', ['habitual', 'toe-out', 'P', 'd']) e.write_row(e.statistics, 'narrow_stat', narrow_stat) e.write_row(e.statistics, '', ['']) # graph data graph_dir = 'C:\\Users\\Taire\\Desktop\\TylerPapers\\data_workspace\\{}'.format( file_name + '_narrow') title = 'P = {}, d = {}'.format(narrow_stat[-2], narrow_stat[-1]) plot_comparisons(title, graph_name, 'Habitual \n FPA', 'Toe-out \n FPA', np.mean(narrow_normal), np.std(narrow_normal, ddof=1), np.mean(narrow_out), np.std(narrow_out, ddof=1), graph_dir, file_type='png') # statistical analysis habitual comparison _, habitual_p = ttest_ind(wide_normal, narrow_normal) habitual_d = cohens_d.between_group(wide_normal, narrow_normal) narrow_normal_mean = str( np.round(np.mean(narrow_normal), decimals=round_dec)) narrow_normal_std = str( np.round(np.std(narrow_normal, ddof=1), decimals=round_dec)) wide_normal_mean = str(np.round(np.mean(wide_normal), decimals=round_dec)) wide_normal_std = str( np.round(np.std(wide_normal, ddof=1), decimals=round_dec)) habitual_stat = [ narrow_normal_mean + ' ± ' + narrow_normal_std, wide_normal_mean + ' ± ' + wide_normal_std, str(np.round(habitual_p, decimals=3)), str(np.round(habitual_d, decimals=3)) ] # write excel e.write_row(e.statistics, '', ['narrow', 'wide', 'P', 'd']) e.write_row(e.statistics, 'habitual_stat', habitual_stat) e.write_row(e.statistics, '', ['']) # graph data graph_dir = 'C:\\Users\\Taire\\Desktop\\TylerPapers\\data_workspace\\{}'.format( file_name + '_inter') title = 'P = {}, d = {}'.format(habitual_stat[-2], habitual_stat[-1]) plot_comparisons(title, graph_name, 'Narrow \n FPA group', 'Wide \n FPA group', np.mean(narrow_normal), np.std(narrow_normal, ddof=1), np.mean(wide_normal), np.std(wide_normal, ddof=1), graph_dir, file_type='png') # save results e.workbook.save(file_dir)
print('Completed in {0:.2f} seconds'.format(end - start)) if __name__ == "__main__": all_analytics = [] with open("output.csv", "w") as file: file.write(",".join([ "dataset", "kernel", "normalization", "accuracy", "precision", "recall", "fscore" ]) + "\n") for dataset in DATASETS: print("Preprocessing data from {0}".format(dataset)) start = time.time() # extract real data from csv graphs = ed.extract_data(dataset) # generate fake data fake_graphs = ed.shuffle_graphs(graphs, classification_difficulty) # combining & shuffling real and fake data complete_data = ed.combine(graphs, fake_graphs) # splitting training & testing sets bound = int(len(complete_data) * training_ratio) training_set = complete_data[:bound] testing_set = complete_data[bound:] end = time.time() print("Preprocessing complete in {0:.2f} seconds".format(end - start)) for kernel in KERNELS: for n in NORMALIZATIONS: da = DatasetAnalytics(dataset, kernel, n, training_set, testing_set)
import json import datetime import time from phrasing_url import phrasing_url from extract_data import extract_data initial_url = "https://www.zhipin.com/c101280600/?query=%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90&page=" dic = [] i = 1 while i < 11: print("\n\n" + "pharsing page: " + str(i)) url = initial_url + str(i) + '&ka=page-' + str(i) print(url) html = phrasing_url(url) dic.append(extract_data(html)) print("Time now: " + str(datetime.datetime.now())) time.sleep(60) i = i + 1 with open('bosszhipin2.txt', 'w', encoding='utf-8') as f: f.write(json.dumps(dic, ensure_ascii=False))
""" Created on Fri Dec 20 15:32:21 2019 This is the master script that runs the following processes #1 Reading the observed surge time series for 902 tide gauges #2 Reading netcdf files to extract predictors for each tide gauge #3 Concatenating predictors with corresponding surge time series for each tide gauge and save as a dataframe #4 Saving the dataframe as a .csv in the ../pred_and_surge *delta: distance (in degrees) from the tide gauge - this will form a grid of delta x delta box around each tide gauge @author: Master Script """ import os os.chdir("E:\data\scripts\modeling_storm_surge") from extract_data import extract_data delta = 5 extract_data(delta)
#Creating endpoints @app.route("/contacts") def get_contacts(): """ This functions returns response in JSON format for the given endpoint. args: None Variables used: name, revenue_gte,company_id return: response <Json> """ name="{}".format(request.args.get('name','none')) revenue_gte="{}".format(request.args.get('revenue_gte', 'none')) company_id="{}".format(request.args.get('company_id', 'none')) # Filter based on name if name != 'none': data = json.dumps(extract_data(specific_contact_name=name), indent=2) # Filter based on greater than or equal to revenue given elif revenue_gte != 'none': data = json.dumps(extract_data(specific_revenue=revenue_gte), indent=2) # Filter based on company id elif company_id != 'none': data = json.dumps(extract_data(specific_company_id=company_id), indent=2) # Else fetch all contacts else: data = json.dumps(extract_data(), indent=2) response = app.response_class( response=data, status=200, mimetype='application/json' )
''' 1、需寻找的图片网址为Project以及Unitia Unitia以 https://ba.hitomi.la/galleries/1294943/ 开头, 后接图片名 Project以 https://ba.hitomi.la/galleries/1286145/ 开头 2、图片名用列表保存 ''' from extract_data import extract_data from download import download unitia = "https://hitomi.la/reader/1294943.html" project = "https://hitomi.la/reader/1286145.html" unitia_img = extract_data(unitia) unitia_id = unitia.split('reader' + '/')[1].split('.html')[0] print(unitia_id) download(unitia_img, unitia_id)
help="regularization coefficient", required=True, ) args = parser.parse_args() commondir = args.scriptdir sys.path.append(commondir) from utils import load_data from extract_data import extract_data # load data into Pandas dataframe data_dir = args.datadir if not os.path.exists(os.path.join(data_dir, "energy.csv")): extract_data(data_dir) energy = load_data(data_dir) # parse values of hyperparameters T = int(args.T) LATENT_DIM_1 = int(args.LATENT_DIM_1) LATENT_DIM_2 = int(args.LATENT_DIM_2) BATCH_SIZE = int(args.BATCH_SIZE) LEARNING_RATE = float(args.LEARNING_RATE) ALPHA = float(args.ALPHA) # train and evaluate RNN multi-step network with given values of hyperaparameters run_training(energy, T, LATENT_DIM_1, LATENT_DIM_2, BATCH_SIZE, LEARNING_RATE, ALPHA)
def generate_statistics(path): all_wav_files_ordered = [] wav_file_sizes = [] text_transcriptions = [] speakers = [] sexes = [] births = [] youths = [] cnt = 0 counter_short_long = 0 for path, subdirs, files in os.walk(path): for name in files: if not name.endswith('spl'): continue spl_path = os.path.join(path, name) speaker, name, sex, region_of_birth, region_of_youth, wavs_and_transcriptions = extract_data(spl_path) wav_path = spl_path.replace('.spl', '/').replace('data', 'speech') for key in wavs_and_transcriptions: final_path = f'{wav_path}{key}' if not os.path.isfile(final_path): continue f = sf.SoundFile(final_path) duration = len(f) / f.samplerate # # if duration < 2: # counter_short_long += 1 # continue cnt += 1 if cnt % 1000 == 0: print(cnt) all_wav_files_ordered.append(final_path) wav_file_sizes.append(os.path.getsize(final_path)) speakers.append(speaker) lower_text = wavs_and_transcriptions[key].lower() \ .replace(',', ' ').replace('è', "e").replace('é', "e").replace("ÿ", "y").replace("ü", "u") for c in string.punctuation: lower_text = lower_text.replace(c, " ") text_transcriptions.append(lower_text) sexes.append(sex) births.append(region_of_birth) youths.append(region_of_youth) group_sexes = Counter(sexes) group_births = Counter(births) group_youths = Counter(youths) group_speakers = Counter(speakers) print('=========SEXES==========') print(group_sexes.values()) print(group_sexes.keys()) print('===================') print('=========births==========') print(group_births.values()) print(group_births.keys()) print('===================') print('=========youths==========') print(group_youths.values()) print(group_youths.keys()) print('===================') print('=========speakers==========') print(len(group_speakers.values())) print(group_speakers.values()) print(group_speakers.keys()) print('===================') print(counter_short_long)
import numpy as np from extract_data import extract_data def floyd(dis_map): vertices_num = len(dis_map) for k in range(1, vertices_num): for i in range(1, vertices_num): for j in range(1, vertices_num): temp = dis_map[i][k] + dis_map[k][j] if dis_map[i][j] > temp: dis_map[i][j] = temp for i in range(vertices_num): dis_map[i][i] = 0 return dis_map def get_information(filename): info, req_edges, dis_map = extract_data(filename) dis_map = floyd(dis_map) return info, req_edges, dis_map if __name__ == '__main__': filename = 'C:\FILES and WORKS\学习\大三上\AI\AILAB\CARP\CARPResource\Proj2_Carp\CARP_samples\gdb1.dat' info, req_edges, dis_map = extract_data(filename) # print(dis_map) # print() dis_map = floyd(dis_map) print(dis_map)
def generate_statistics_from_file(path, wavs): wav_paths_read = [] speakers = [] sexes = [] births = [] youths = [] cnt = 0 for path, subdirs, files in os.walk(path): for name in files: if not name.endswith('spl'): continue spl_path = os.path.join(path, name) speaker, name, sex, region_of_birth, region_of_youth, wavs_and_transcriptions = extract_data(spl_path) wav_path = spl_path.replace('.spl', '/').replace('data', 'speech') for key in wavs_and_transcriptions: final_path = f'{wav_path}{key}' # if final_path not in wavs: # continue cnt += 1 if cnt % 100000 == 0: print(cnt) speakers.append(speaker) wav_paths_read.append(final_path) sexes.append(sex) births.append(region_of_birth) youths.append(region_of_youth) indices = [] for path in wavs: indices.append(wav_paths_read.index(path)) # indices = int(indices) group_sexes = Counter([sexes[i] for i in indices]) group_births = Counter([births[i] for i in indices]) group_youths = Counter([youths[i] for i in indices]) group_speakers = Counter([speakers[i] for i in indices]) print('=========SEXES==========') print(group_sexes.values()) print(group_sexes.keys()) print('===================') print('=========births==========') print(group_births.values()) print(group_births.keys()) print('===================') print('=========youths==========') print(group_youths.values()) print(group_youths.keys()) print('===================') print('=========speakers==========') print(group_speakers.values()) print(group_speakers.keys()) print('===================')
def get_information(filename): info, req_edges, dis_map = extract_data(filename) dis_map = floyd(dis_map) return info, req_edges, dis_map
def run_interrupt(ifile): """ run all sci run plot routines input: ifile --- input file name. if it is not given, the script will ask output: <plot_dir>/*.png --- ace data plot <ephin_dir>/*.png --- ephin data plot <goes_dir>/*.png --- goes data plot <xmm_dir>/*.png --- xmm data plot <html_dir>/*.html --- html page for that interruption <web_dir>/rad_interrupt.html --- main page """ # #--- check input file exist, if not ask # test = exc_dir + ifile if not os.path.isfile(test): ifile = input('Please put the intrrupt timing list: ') # #--- extract data # print("Extracting Data") edata.extract_data(ifile) data = mcf.read_data_file(ifile) for ent in data: atemp = re.split('\s+|\t+', ent) event = atemp[0] start = atemp[1] stop = atemp[2] gap = atemp[3] itype = atemp[4] print("PLOTING: " + str(event)) # #--- plot Ephin data # print("EPHIN/HRC") ephin.plot_ephin_main(event, start, stop) # #---- plot GOES data # print("GOES") goes.plot_goes_main(event, start, stop) # #---- plot other radiation data (from NOAA) # print("NOAA") ace.start_ace_plot(event, start, stop) # #---- extract and plot XMM data # print("XMM") xmm.read_xmm_and_process(event) # #---- create indivisual html page # print("HTML UPDATE") srphtml.print_each_html_control(ifile) # #---- update main html page # srphtml.print_each_html_control()