Beispiel #1
0
def main():
    if len(sys.argv) != 2:
        print "Incorrect usage. Use as follows:"
        print "python main.py <STOCK_NAME>"
        return
    stock = sys.argv[1].lower()
    print "=" * 100
    print "FETCHING DATA FOR STOCK: " + stock
    print "=" * 100
    data = ed.extract_data(stock)
    print
    print

    print "=" * 100
    print "GENERATING AUDIO FILE"
    print "=" * 100
    beat_box.generate(stock, data)
    print
    print

    print "=" * 100
    print "PUSHING TO GITHUB"
    print "=" * 100
    github_sync.sync_with_github()
    print
    print

    print "=" * 100
    print "DONE :)"
    print "=" * 100
Beispiel #2
0
def main():
	if len(sys.argv) != 2:
		print "Incorrect usage. Use as follows:"
		print "python main.py <STOCK_NAME>"
		return
	stock = sys.argv[1].lower()
	print "="*100
	print "FETCHING DATA FOR STOCK: " + stock
	print "="*100
	data = ed.extract_data(stock)
	print
	print

	print "="*100
	print "GENERATING AUDIO FILE"
	print "="*100
	beat_box.generate(stock, data)
	print
	print

	print "="*100
	print "PUSHING TO GITHUB"
	print "="*100
	github_sync.sync_with_github()
	print
	print

	print "="*100
	print "DONE :)"
	print "="*100
Beispiel #3
0
def proj_length():        
    session['length'] = int(request.form['length']) #length in periods
    session['length_years'] = int(math.ceil(session['length']/float(session['periods'])))

    if session['subnational'] == False:
        data, session['pop'] = ed.extract_data(session['iso3'], start=START)
        for k in ['decline', 'detected_mdr', 'total_mdr', 'p_ret_rel', 'p_new', 
                'prob_mdr_new', 'prob_mdr_re', 'prev_100_k']:
            session[k] = data[k]
    else:
        session['pop'] = {y: 0 for y in range(START, 2031)}
        for k in ['decline', 'detected_mdr', 'total_mdr', 'p_ret_rel', 'p_new', 
                'prob_mdr_new', 'prob_mdr_re', 'prev_100_k']:
            session[k] = None 

    # years for which we need population data
    pop_years = range(session['hist_years'][0], session['hist_years'][-1]+
            session['length_years'] + 1)
    # divide years in 2 columns for layout reasons
    pop_years_1 = pop_years[:len(pop_years)/2 + 1]
    pop_years_2 = pop_years[len(pop_years)/2 + 1:]

    # save as all_years to be consistent with non-manual sessions
    session['all_years'] = pop_years

    return render_template("pop_estimates.html", pop_years_1=pop_years_1, 
            pop_years_2=pop_years_2, pop_dict=session['pop'],
            region=session['subnational'])
Beispiel #4
0
def data_country():
    
    # session using WHO data (not manually inserted)
    session['manual'] = False
    session['subnational'] = False
    # WHO data is yearly (1 period per year)
    session['periods'] = 1

    session['country'] = request.form['country']
    with open('tbprojections/country_names.pickle', 'r') as f:
        names = cPickle.load(f) 

    for (k, v) in names.items():
        if v == session['country']:
            session['iso3'] = k

    # if the user selected a country for which enough data
    if ut._verify_ISO3(session['iso3']):
        # Extract the data
        #=================
        #'data' is a dictionary (see extract_data.py)
        #'pop' is a dictionary containing UN estimates for population
        # Inside 'data'
        #'p_new': proportion new cases among all
        #'p_ret_rel': proportion retreat - relapse cases among all
        #'prob_mdr_new': probability new case having MDR
        #'prob_mdr_re': probability retreat - relapse case having MDR
        #'decline': estimated decline in incidence
        #'prev_100_k': estimated prevalence for 100k population
        #'detected_mdr': detected MDR cases in last year
        #'total_mdr': estimated MDR cases in last year
 
        session['all_data'], session['pop'] = ed.extract_data(session['iso3'], start=START)  
        #covert keys to integers (issue with storing in 'session')
        session['pop'] = {int(k): int(v) for (k, v) in session['pop'].items()}

        # Projection length
        # =================
        session['length'] = int(request.form['length']) #in periods
        session['length_years'] = session['length'] #in years
        
        # Years for which we need population data are all_years (history +
        # projection years)
        act_year = session['all_data']['years'][-1][0]
        first_year = max(START, session['all_data']['years'][0][0])
        session['all_years'] = range(first_year, act_year+session['length']+1)

        # Divide years in 2 columns for layout reasons
        pop_years_1 = session['all_years'][:len(session['all_years'])/2 + 1]
        pop_years_2 = session['all_years'][len(session['all_years'])/2 + 1:]
        
        # Display following page
        return render_template("pop_estimates.html", 
                pop_years_1=pop_years_1,  pop_years_2=pop_years_2, 
                region=False, pop_dict=session['pop'])
    
    # if the user selected a non valid country ...
    else: 
        return render_template("error.html", error=e.error_country())
Beispiel #5
0
def index():
    form = AvatarForm()
    if request.method == 'GET':
        return render_template('index.html', form=form)

    url = request.form['url']
    # img = request.form['img']
    return extract_data(url)
Beispiel #6
0
def contact(phone_number, country):
    country_data = extract_data()[country]
    try:
        result = sendSMS(phone_number, country, country_data)
        print(result)
        response = jsonify("Success!")
    except:
        response = jsonify(
            "Failed to send message - SMS API may be overloaded")

    response.headers.add("Access-Control-Allow-Origin", "*")
    return response
def price_watch(url):
    product_name, product_price, currency, avail = extract_data(url)
    timestamp = str(datetime.now())
    json_dict = {
        "Product name": product_name,
        "Product price": product_price,
        "Currency": currency,
        "Availability": avail,
        "URL": url,
        "Timestamp": timestamp
    }
    print(f"{product_name:50}{product_price:8} {currency:5}({avail})")
    return json_dict
Beispiel #8
0
def train_model():
    extract_data.extract_data()

    print "Saving a train LMDB into the {0} directory".format(TRAIN_LMDB)
    create_lmdb(TRAIN_FILE, EXTRACT_DIR, TRAIN_LMDB)

    print "Saving a test LMDB into the {0} directory".format(TEST_LMDB)
    create_lmdb(TEST_FILE, EXTRACT_DIR, TEST_LMDB)

    print "Creating a train prototxt file with batch size {0} into {1}".format(
        TRAIN_BATCH_SIZE, TRAIN_LMDB)
    with open(TRAIN_PROTOTXT, 'w') as f:
        f.write(str(snooker_net(TRAIN_LMDB, TRAIN_BATCH_SIZE)))

    print "Creating a test prototxt file with batch size {0} into {1}".format(
        TEST_BATCH_SIZE, TEST_LMDB)
    with open(TEST_PROTOTXT, 'w') as f:
        f.write(str(snooker_net(TEST_LMDB, TEST_BATCH_SIZE)))

    print "Creating a solver prototxt into the {0} file".format(
        SOLVER_PROTOTXT)
    with open(SOLVER_PROTOTXT, 'w') as f:
        f.write(str(create_solver(TRAIN_PROTOTXT, TEST_PROTOTXT)))

    print "Training the model"
    results = train(SOLVER_PROTOTXT, METHOD)

    print "Saving the trained model to {0} and {1}".format(
        MODEL_OUTPUT, MODEL_PTX)
    results.net.save(MODEL_OUTPUT)
    with open(MODEL_PTX, 'w') as f:
        f.write(
            str(snooker_net(TRAIN_LMDB, TRAIN_BATCH_SIZE,
                            output_type="Deploy")))

    print "Saving accuracy chart to {0}".format(ACCURACY_CHART)
    save_accuracy(results, ACCURACY_CHART)
Beispiel #9
0
def graph_eval(dataset_loc, input_graph_def, graph, input_node, output_node,
               batchsize):

    input_graph_def.ParseFromString(tf.gfile.GFile(graph, "rb").read())

    # Extact data from csv files again
    training_dataset_filepath = '%ssign_mnist_train/sign_mnist_train.csv' % dataset_loc
    testing_dataset_filepath = '%ssign_mnist_test/sign_mnist_test.csv' % dataset_loc
    train_data, train_label, val_data, val_label, testing_data, testing_label = extract_data(
        training_dataset_filepath, testing_dataset_filepath, 0)

    total_batches = int(len(testing_data) / batchsize)

    #Import Graph
    tf.import_graph_def(input_graph_def, name='')

    # Get input placeholders & tensors
    images_in = tf.get_default_graph().get_tensor_by_name(input_node + ':0')
    labels = tf.placeholder(tf.int32, shape=[None, 25])

    # get output tensors
    logits = tf.get_default_graph().get_tensor_by_name(output_node + ':0')
    predicted_logit = tf.argmax(input=logits, axis=1, output_type=tf.int32)
    ground_truth_label = tf.argmax(labels, 1, output_type=tf.int32)

    # Define the metric and update operations
    tf_metric, tf_metric_update = tf.metrics.accuracy(
        labels=ground_truth_label, predictions=predicted_logit, name='acc')

    with tf.Session() as sess:
        progress = ProgressBar()

        sess.run(tf.initializers.global_variables())
        sess.run(tf.initializers.local_variables())

        # process all batches
        for i in progress(range(0, total_batches)):

            # fetch a batch from validation dataset
            x_batch, y_batch = testing_data[i*batchsize:i*batchsize+batchsize], \
                               testing_label[i*batchsize:i*batchsize+batchsize]

            # Run graph for accuracy node
            feed_dict = {images_in: x_batch, labels: y_batch}
            acc = sess.run(tf_metric_update, feed_dict)

        print('Graph accuracy with validation dataset: {:1.4f}'.format(acc))

    return
Beispiel #10
0
def train_nn(dataset_loc, train_bool, num_test):

    if (train_bool):
        #Set Parameters
        DATASET_SIZE = 27455
        BATCHSIZE = 32
        EPOCHS = 10
        LEARN_RATE = 0.0002
        DECAY_RATE = 3e-6
        NUM_IMAGES = 10
        #Pre-processes data and trains the neural network

        #Get the column names form the first row of the csv file
        training_dataset_filepath = '%ssign_mnist_train/sign_mnist_train.csv' % dataset_loc
        testing_dataset_filepath = '%ssign_mnist_test/sign_mnist_test.csv' % dataset_loc

        train_data, train_label, val_data, val_label, testing_data, testing_label = extract_data(
            training_dataset_filepath, testing_dataset_filepath, num_test)

        model = neural_network()

        model.compile(loss='categorical_crossentropy',
                      optimizer=optimizers.Adam(lr=LEARN_RATE,
                                                decay=DECAY_RATE),
                      metrics=['accuracy'])

        model.fit(train_data,
                  train_label,
                  batch_size=BATCHSIZE,
                  shuffle=True,
                  epochs=EPOCHS,
                  validation_data=(val_data, val_label))

        #Evaluate Model Accracy
        scores = model.evaluate(testing_data,
                                testing_label,
                                batch_size=BATCHSIZE)

        print('Loss: %.3f' % scores[0])
        print('Accuracy: %.3f' % scores[1])

        # save weights, model architecture & optimizer to an HDF5 format file
        model.save(os.path.join('./train', 'keras_trained_model.h5'))
        print('Finished Training')

    print('Convert Keras to TF')
    keras2tf('train/keras_trained_model.h5', 'train/tfchkpt.ckpt', 'train')
Beispiel #11
0
    def generateReport(self):

        popup = QMessageBox()
        popup.setIcon(QMessageBox.Critical)
        popup.setWindowTitle("RBA Generator")

        if self.logFile == None:
            popup.setText("Select a file")
            popup.exec()
            return

        if self.puList.currentIndex() == 0:
            popup.setText("Select valid PU")
            popup.exec()
            return

        if self.duList.currentIndex() == 0:
            popup.setText("Select valid DU")
            popup.exec()
            return

        if self.accountsList.currentIndex() == 0:
            popup.setText("Select valid Account")
            popup.exec()
            return

        #call function to generate report of the file and get the file name
        name_list = [
            self.puList.currentText(),
            self.duList.currentText(),
            self.accountsList.currentText()
        ]
        Ui_MainWindow.pu_du_list = name_list
        self.finalFileName = extract_filename.extract_filename(self.logFile)
        self.finalFileName = self.finalFileName + "_utilization_report_"
        self.finalFileName = extract_data.extract_data(name_list, self.logFile,
                                                       self.finalFileName)
        Ui_MainWindow.globalFilename = self.finalFileName
        self.reportFileName.setText(self.finalFileName)

        self.downloadReport.setEnabled(True)
        self.emailReport.setEnabled(True)
        self.downloadButton.setEnabled(True)
def train_nn(dataset_loc, train_bool, num_test):
    if (train_bool):
        DATASET_SIZE = 27455
        BATCHSIZE = 32
        EPOCHS = 3
        LEARN_RATE = 0.0001
        DECAY_RATE = 1e-6
        NUM_IMAGES = 10

        training_dataset_filepath = '%smnist/sign_mnist_train/sign_mnist_train.csv' % dataset_loc
        testing_dataset_filepath = '%smnist/sign_mnist_test/sign_mnist_test.csv' % dataset_loc

        train_data, train_label, val_data, val_label, testing_data, testing_label = extract_data(
            training_dataset_filepath, testing_dataset_filepath, num_test)

        model = neural_network()

        model.compile(loss='categorical_crossentropy',
                      optimizer=optimizers.Adam(lr=LEARN_RATE,
                                                decay=DECAY_RATE),
                      metrics=['accuracy'])

        model.fit(train_data,
                  train_label,
                  batch_size=BATCHSIZE,
                  shuffle=True,
                  epochs=EPOCHS,
                  validation_data=(val_data, val_label))

        scores = model.evaluate(testing_data,
                                testing_label,
                                batch_size=BATCHSIZE)

        print('Loss: %.3f' % scores[0])
        print('Accuracy: %.3f' % scores[1])

        model.save(os.path.join('./train', 'keras_trained_model.h5'))
        print('Finished Training')

    print('Convert Keras to TF')
    keras2tf('train/keras_trained_model.h5', 'train/tfchkpt.ckpt', 'train')
Beispiel #13
0
def classify():
    model, total_wise, total_future = train_model()
    test_data, test_labels = extract_data(data_filename, label_filename)
    results = []
    correct_counter = 0
    for i, sentence in enumerate(test_data):
        array_sentence = sentence.split(' ')
        wise_probability, future_probability = compute_sentence_probability(
            model, total_wise, total_future, array_sentence)
        class_prediction = predict_class(wise_probability, future_probability)
        results.append({
            "sentence": sentence,
            "prediction": class_prediction,
            "actual": test_labels[i]
        })
        if (class_prediction == int(test_labels[i])):
            correct_counter += 1
    for result in results:
        print(result)
    print("Accuracy: {0:.2f}%".format(
        (correct_counter / len(test_labels)) * 100))
def lambda_handler(event, context):
    """
    Parameters
    ----------
    event: dict, required

        keys:
            environment: str in {'production', 'testing'} determines which download URLs to use

    context: object, required
        Lambda Context runtime methods and attributes

    Returns
    ------
    Invocation is Asynchronous
    """

    env = event["environment"]

    ny_times_data, jh_data, prev_data = extract_data(
        env, BUCKET_NAME, KEY, s3)

    transformed_data, new_records, updated_records = transform_data(
        ny_times_data, jh_data, prev_data)

    load_data(env, BUCKET_NAME, KEY, CHANGE_LOG,
              transformed_data, new_records, updated_records, s3)

    if prev_data is None:
        return {
            "Status": "New Data Loaded",
            "New Records": str(len(new_records)),
            "Updated Records": "--"
        }

    return {
        "Status": "Daily Data Updated",
        "New Records": str(len(new_records)),
        "Updated Records": str(len(updated_records))
    }
def histogram_bounds():
    #Comment out/delete the feature you want to remove
    x, y = ed.extract_data()
    x1 = np.zeros((len(x), 1))
    x2 = np.zeros((len(x), 1))
    x3 = np.zeros((len(x), 1))
    x4 = np.zeros((len(x), 1))
    for i in range(len(x)):
        x1[i] = x[i][0]
        x2[i] = x[i][1]
        x3[i] = x[i][2]
        x4[i] = x[i][3]

    min_x1, max_x1 = mf.find_smallest_and_biggest_value(x1)
    min_x2, max_x2 = mf.find_smallest_and_biggest_value(x2)
    min_x3, max_x3 = mf.find_smallest_and_biggest_value(x3)
    min_x4, max_x4 = mf.find_smallest_and_biggest_value(x4)

    min_list = [min_x1, min_x2, min_x3, min_x4]
    max_list = [max_x1, max_x2, max_x3, max_x4]

    return min_list, max_list
Beispiel #16
0
def generate_full_csv(path, write_file):
    all_wav_files_ordered = []
    wav_file_sizes = []
    text_transcriptions = []
    speakers = []
    sexes = []
    births = []
    youths = []

    cnt = 0
    counter_short_long = 0
    for path, subdirs, files in os.walk(path):
        for name in files:

            # if len(all_wav_files_ordered) > 20000:
            #     break

            if not name.endswith('spl'):
                continue

            spl_path = os.path.join(path, name)
            speaker, name, sex, region_of_birth, region_of_youth, wavs_and_transcriptions = extract_data(spl_path)

            if not sex:
                continue

            wav_path = spl_path.replace('.spl', '/').replace('data', 'speech')

            for key in wavs_and_transcriptions:

                final_path = f'{wav_path}{key}'

                if not os.path.isfile(final_path):
                    continue

                if final_path in excluded_train_wavs:
                    print(final_path)
                    continue


                # f = sf.SoundFile(final_path)
                # duration = len(f) / f.samplerate

                # if duration < 4:
                #     counter_short_long+=1
                #     continue

                cnt += 1
                if cnt % 10000 == 0:
                    print(cnt)
                # if train then those produce inf in loss
                #if cnt >= 60000 and cnt <= 70000:
                #    print(cnt)
                #    continue

                #if cnt >= 110000 and cnt <= 150000:
                #     print(cnt)
                #     continue
                #if test the those produce inf loss
                if cnt > 15000 and cnt < 19000:
                    continue

                lower_text = wavs_and_transcriptions[key].lower() \
                    .replace(',', ' ').replace('è', "e").replace('é', "e").replace("ÿ", "y").replace("ü", "u").replace(
                    'î', 'i')

                for c in string.punctuation:
                    lower_text = lower_text.replace(c, " ")

                #features = audiofile_to_input_vector(final_path, numcep, numcontext)
                #features_len = len(features) - 2 * numcontext
                #transcript = text_to_char_array(lower_text, alphabet)

                #if features_len < len(transcript):
                #    counter_short_long += 1
                #    print(final_path)
                #    continue

                speakers.append(speaker)

                all_wav_files_ordered.append(final_path)
                wav_file_sizes.append(os.path.getsize(final_path))

                text_transcriptions.append(lower_text)
                sexes.append(sex)
                births.append(region_of_birth)
                youths.append(region_of_youth)

    group_births = Counter(births)


    # num_test_data = int(test_percent * len(all_wav_files_ordered))

    train_wav_indices = []
    dev_wav_indices = []

    for region in group_births.keys():
        # dict_with_region_indices[region] = np.where(group_births == region)[0]
        indices = np.where(np.array(births) == region)[0]

        #if 'Stockholm' in region:
        #    indices = indices[0:20000]
        #elif 'Göteborg' in region:
        #    indices = indices[0:10000]
        #else:
        indices = indices[0:5000]

        num_train_data = int(training_percent * len(indices))

        train_indices = np.array(indices[0:num_train_data])
        dev_indices = np.array(indices[num_train_data:])

        train_wav_indices.extend([i for i in train_indices])
        dev_wav_indices.extend([i for i in dev_indices])


    train_wav_files = [all_wav_files_ordered[i] for i in train_wav_indices]
    dev_wav_files = [all_wav_files_ordered[i] for i in dev_wav_indices]
    # test_wav_files = all_wav_files_ordered[num_train_data + num_dev_data:]
    #
    train_wav_file_sizes = [wav_file_sizes[i] for i in train_wav_indices]
    dev_wav_file_sizes = [wav_file_sizes[i] for i in dev_wav_indices]
    # test_wav_file_sizes = wav_file_sizes[num_train_data + num_dev_data:]
    #
    train_transcriptions_files = [text_transcriptions[i] for i in train_wav_indices]
    dev_transcriptions_files = [text_transcriptions[i] for i in dev_wav_indices]
    # test_transcriptions_files = text_transcriptions[num_train_data + num_dev_data:]

    train_dict = {"wav_filename": train_wav_files,
                  "wav_filesize": train_wav_file_sizes,
                  "transcript": train_transcriptions_files}

    val_dict = {"wav_filename": dev_wav_files,
                  "wav_filesize": dev_wav_file_sizes,
                  "transcript": dev_transcriptions_files}

    # test_dict = {"wav_filename": test_wav_files,
    #               "wav_filesize": test_wav_file_sizes,
    #               "transcript": test_transcriptions_files}
    #
    #write_to_csv(filename='/home/guest/Desktop/DeepSpeech/data/TRAIN/train.csv', dictionary=train_dict)

    #write_to_csv(filename='/home/guest/Desktop/DeepSpeech/data/DEV/dev.csv', dictionary=val_dict)
    #
    # write_to_csv(filename='/home/guest/Desktop/DeepSpeech/data/TEST/test.csv', dictionary=test_dict)

    # unique_transcr = set(text_transcriptions)
    # list_ = list(unique_transcr)
    #
    # print('text : ',len(text_transcriptions))
    # print('unique : ',len(list_))
    #
    # final_dict = {"transcript": list_}
    # write_to_csv_2(filename='/home/guest/Desktop/Dataset16/sve.16khz.0467-1/0467_sv_train_1/stah2.csv', dictionary=final_dict)

    final_dict = {"wav_filename": all_wav_files_ordered,
                  "wav_filesize": wav_file_sizes,
                  "transcript": text_transcriptions}

    write_to_csv(filename=write_file, dictionary=train_dict)


    print('============ALL=============')


    group_sexes = Counter(sexes)
    group_births = Counter(births)
    group_youths = Counter(youths)

    print('=========SEXES==========')
    for sex in group_sexes.keys():
        print("key : " + sex + 'with value : ' + str(group_sexes[sex]))
    # print(group_sexes.values())
    # print(group_sexes.keys())
    print('===================')

    print('=========births==========')
    for sex in group_births.keys():
        print("key : " + sex + 'with value : ' + str(group_births[sex]))
    # print(group_births.values())
    # print(group_births.keys())
    print('===================')

    print('=========youths==========')
    for sex in group_youths.keys():
        print("key : " + sex + 'with value : ' + str(group_youths[sex]))
    # print(group_births.values())
    # print(group_youths.values())
    # print(group_youths.keys())
    print('===================')




    print(counter_short_long)
    print('============TRAIN=============')

    group_sexes = Counter([sexes[i] for i in train_wav_indices])
    group_births = Counter([births[i] for i in train_wav_indices])
    group_youths = Counter([youths[i] for i in train_wav_indices])

    print('=========SEXES==========')
    for sex in group_sexes.keys():
        print("key : " + sex + 'with value : ' + str(group_sexes[sex]))
    # print(group_sexes.values())
    # print(group_sexes.keys())
    print('===================')

    print('=========births==========')
    for sex in group_births.keys():
        print("key : " + sex + 'with value : ' + str(group_births[sex]))
    # print(group_births.values())
    # print(group_births.keys())
    print('===================')

    print('=========youths==========')
    for sex in group_youths.keys():
        print("key : " + sex + 'with value : ' + str(group_youths[sex]))
    # print(group_births.values())
    # print(group_youths.values())
    # print(group_youths.keys())
    print('===================')


    print('============DEV=============')

    group_sexes = Counter([sexes[i] for i in dev_wav_indices])
    group_births = Counter([births[i] for i in dev_wav_indices])
    group_youths = Counter([youths[i] for i in dev_wav_indices])

    print('=========SEXES==========')
    for sex in group_sexes.keys():
        print("key : " + sex + 'with value : ' + str(group_sexes[sex]))
    # print(group_sexes.values())
    # print(group_sexes.keys())
    print('===================')

    print('=========births==========')
    for sex in group_births.keys():
        print("key : " + sex + 'with value : ' + str(group_births[sex]))
    # print(group_births.values())
    # print(group_births.keys())
    print('===================')

    print('=========youths==========')
    for sex in group_youths.keys():
        print("key : " + sex + 'with value : ' + str(group_youths[sex]))
    # print(group_births.values())
    # print(group_youths.values())
    # print(group_youths.keys())
    print('===================')
Beispiel #17
0
def data(filename=None):
    response = jsonify(extract_data())
    response.headers.add("Access-Control-Allow-Origin", "*")
    return response
Beispiel #18
0
  a、多个录音,还有评论,选取第一个?
  b、是否是空白的
'''

import json
from phrasing_url import phrasing_url
from extract_data import extract_data
from download import download

initial = "https://tft.rocks/topic/"
i = 1
tft_list = []
list_issue = [216, 471, 491, 655, 656, 657, 658]
while i < 659:
    if i in list_issue:
        url_issue = initial + str(i)
        dic = {"refs": i, "url": url_issue}
    else:
        html = phrasing_url(initial, i)
        dic = extract_data(html, i)
        print(dic)
        tft_list.append(dic)
    i = i + 1

#print(data)
#print(dic)
with open('tftrocks.txt', 'w', encoding='utf-8') as f:
    #f.write(json.dumps(dic,ensure_ascii=False,indent=2))
    json.dump(tft_list, f, ensure_ascii=False, indent=2)
    #f.write(json.dumps(i))
Beispiel #19
0
def run_interrupt(file):
    """
    run all sci run plot routines
    input:  file                --- input file name. if it is not given, the script will ask
    output: <plot_dir>/*.png    --- ace data plot
            <ephin_dir>/*.png   --- ephin data plot
            <goes_dir>/*.png    --- goes data plot
            <xmm_dir>/*.png     --- xmm data plot
            <html_dir>/*.html   --- html page for that interruption
            <web_dir>/rad_interrupt.html    --- main page
    """
    #
    #--- check input file exist, if not ask
    #
    test = exc_dir + file
    if not os.path.isfile(test):
        file = raw_input('Please put the intrrupt timing list: ')

    if file == 'test':
        #
        #--- if this is a test case, prepare for the test
        #
        comp_test = 'test'
        file = test_web_dir + 'test_date'
    else:
        comp_test = 'NA'
#
#--- extract data
#
    print "Extracting Data"
    edata.extract_data(file)

    f = open(file, 'r')
    data = [line.strip() for line in f.readlines()]
    f.close()

    for ent in data:
        atemp = re.split('\s+|\t+', ent)
        event = atemp[0]
        start = atemp[1]
        stop = atemp[2]
        gap = atemp[3]
        type = atemp[4]

        print "PLOTING: " + str(event)
        #
        #--- plot Ephin data
        #
        print "EPHIN"
        ephin.plotEphinMain(event, start, stop, comp_test)
        #
        #---- plot GOES data
        #
        print "GOES"
        goes.plotGOESMain(event, start, stop, comp_test)
        #
        #---- plot other radiation data (from NOAA)
        #
        print "NOAA"
        noaa.startACEPlot(event, start, stop, comp_test)
        #
        #---- extract and plot XMM data
        #
        print "XMM"
        xmm.read_xmm_and_process(event)

#
#---- create indivisual html page
#
    print "HTML UPDATE"
    srphtml.printEachHtmlControl(file)
    #
    #---- update main html page
    #
    srphtml.printEachHtmlControl()
Beispiel #20
0
def chisquareintf(root,
                  name,
                  temp,
                  dist,
                  powlist,
                  denlist,
                  inclist,
                  verbose=0):

    cspdata = np.zeros((len(powlist), len(denlist)))
    bestinc = np.zeros((len(powlist), len(denlist)))

    # loop through densities & power law indexes
    for r in np.arange(0, len(powlist)):
        for c in np.arange(0, len(denlist)):

            incchi = []

            currbestchisq = 1000000  # arbitrary, just has to be very large
            currbestpow = 100  # arbitrary
            currbestden = 100  # arbitrary
            currbestinc = 100  # arbitrary

            for k in np.arange(0, len(inclist)):
                imagefile = open(
                    root + 'IMAGE.' + name + '_t' + temp + '_w0d0_j0h0_' +
                    powlist[r] + '_' + denlist[c] + '_rd12p0_cont_alp0p0_i' +
                    inclist[k], 'r')

                templine = imagefile.readline()
                # get dimensions
                width = int(templine[18:23])
                height = int(templine[26:])

                imagefile.readline()
                imagefile.readline()

                image = np.zeros((height, width))

                for line in imagefile:
                    w = int(line[0:5])
                    h = int(line[6:10])

                    intensity = line[65:75]

                    image[h - 1][w - 1] = intensity

                imagefile.close()

                # norm to 1
                image = image / np.sum(image)
                # chi square statistic

                realdata, imagdata = ext.extract_data(image,
                                                      u,
                                                      v,
                                                      im_scale=0.007,
                                                      cubic=1,
                                                      nohan=1)
                modelvis2 = realdata**2 + imagdata**2
                chisq = 1 / float(np.shape(u)[0] - 3) * sum(
                    (vis2data - modelvis2)**2 / (0.05 * vis2data)**2)
                incchi.append(chisq)

                # pick best inclination
                if np.abs(chisq - 1) < np.abs(currbestchisq - 1):
                    currbestchisq = chisq
                    currbestpow = powlist[r]
                    currbestden = denlist[c]
                    currbestinc = inclist[k]

            if verbose == 1:
                print powlist[r], denlist[c], '\n', incchi, '\n'

            # enter chi square and best inclination
            cspdata[r, c] = np.min(incchi)
            bestinc[r, c] = np.float(currbestinc)

    print "\nchi square grid"
    print np.flipud(cspdata)

    # table of densities vs power law indexes
    fig, csp = plt.subplots()
    heatmap = csp.pcolor(np.abs(1 - 1. / cspdata), cmap=plt.cm.hot_r)
    # gray_r : white is low chisq (good), black is high chisq (bad)
    # hot_r : light is low, dark is high
    # rainbow_r : red is low, blue is high

    csp.set_xticks(np.arange(cspdata.shape[1]) + 0.5, minor=False)
    csp.set_yticks(np.arange(cspdata.shape[0]) + 0.5, minor=False)

    csp.set_xticklabels(denlist, fontweight='bold', minor=False)
    csp.set_yticklabels(powlist, fontweight='bold', minor=False)

    # print chi-squared statistic and best inclination on each block
    for r in range(cspdata.shape[0]):
        for c in range(cspdata.shape[1]):
            csp.text(c + 0.5,
                     r + 0.5,
                     '{0:.6f}'.format(cspdata[r, c]),
                     horizontalalignment='center',
                     verticalalignment='bottom')
            csp.text(c + 0.5,
                     r + 0.5,
                     'Inc = ' + str(bestinc[r, c]),
                     horizontalalignment='center',
                     verticalalignment='top')

    plt.title('Chi-Square Plot - H-band Interferometry',
              fontsize=16,
              fontweight='bold')
    plt.xlabel('Initial Density (rho_0)', fontsize=12, fontweight='bold')
    plt.ylabel('Power Law Index', fontsize=12, fontweight='bold')
    plt.show()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--max_epochs', default=20, type=int, help='no. of epochs')
    parser.add_argument('--batch_size', default=64, type=int, help='batch size')
    parser.add_argument('--patience', default=5, type=int)
    parser.add_argument('--cos_sim_dim', default=20, type=int)
    parser.add_argument('--lr', default=0.005, type=float)
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--is_pretrained', action='store_true')
    parser.add_argument('--embed_dim', default=300,
                        type=int, help='embedding dimension')
    args = parser.parse_args()
    print(args)
    # load_and_split_data(file_path='./data/train.csv')
    train_data, val_data, test_data = extract_data(file_path='./data/train.csv')

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    best_val_loss = float("inf")
    best_val_acc = 0.0
    epochs = args.max_epochs  # The number of epochs
    best_model = None
    best_epoch = 0

    train_set = QQPDataset(train_data, split='train', vocab=None,
                           word2idx=None, pre_process=None, device=device, debug=args.debug)
    val_set = QQPDataset(val_data, split='val', vocab=train_set.vocab,
                         word2idx=train_set.word2idx, pre_process=None, device=device, debug=args.debug)

    test_set = QQPDataset(test_data, split='test', vocab=train_set.vocab,
                          word2idx=train_set.word2idx, pre_process=None, device=device, debug=args.debug)

    # use only first time for loading GloVe
    # parse_n_store_glove(file_path='./data')

    train_dataloader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True)
    val_dataloader = DataLoader(val_set, batch_size=args.batch_size, shuffle=False)
    test_dataloader = DataLoader(test_set, batch_size=args.batch_size, shuffle=False)

    vocab_size = len(train_set.vocab)
    K = args.cos_sim_dim

    if args.is_pretrained:
        # use only once
        word2GloVe = create_word2GloVe_dict(file_path='./data')

        word2GloVe = pickle.load(open(f'./data/word2GloVe_dict.pkl', 'rb'))
        # generate GloVe embeddings for words in vocabulary
        vocab_embeddings = get_glove_embeddings(train_set.vocab, word2GloVe)
        vocab_embeddings = torch.from_numpy(vocab_embeddings).to(device)

        model = BiMPM(vocab_size, embed_dim=None,
                      weight_matrix=vocab_embeddings, hidden_size=100, K=K)
    else:
        embed_dim = args.embed_dim
        model = BiMPM(vocab_size, embed_dim, weight_matrix=None, hidden_size=100, K=K)

    model.to(device)

    lr = args.lr  # learning rate
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', factor=0.1,
    # patience=3, verbose=True)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.1)

    # for i_batch, sample_batched in enumerate(dataloader):
    train_accs = []
    val_accs = []
    train_losses = []
    val_losses = []
    best_model = None
    k = 0
    for epoch in range(1, epochs + 1):
        epoch_start_time = time.time()
        train_loss, train_acc = train(epoch, train_dataloader, model, optimizer)
        train_accs.append(train_acc)
        train_losses.append(train_loss)
        val_loss, val_acc, y_true, y_pred = evaluate(model, val_dataloader)
        val_accs.append(val_acc)
        val_losses.append(val_loss)
        print('-' * 89)
        print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {} | '.format(epoch,
                                                                                (time.time(
                                                                                ) - epoch_start_time),
                                                                                val_loss))
        print('|Epoch {:3d} | valid accuracy {}'.format(epoch, val_acc))
        print('-' * 89)

        scheduler.step()

        if val_acc > best_val_acc:
            best_val_loss = val_loss
            best_val_acc = val_acc
            best_model = model
            best_epoch = epoch
            torch.save(model.state_dict(), "./best_model_lstm.pt")
            pickle.dump(y_true, open(f'best_val_true_labels.pkl', 'wb'))
            pickle.dump(y_pred, open(f'best_val_pred_labels.pkl', 'wb'))

            k = 0
        elif k < args.patience:
            k += 1
        else:
            break
        # scheduler.step()
    print('Best val loss: {} | acc: {} at epoch {}'.format(
        best_val_loss, best_val_acc, best_epoch))
    test_loss, test_acc, y_true, y_pred = evaluate(best_model, test_dataloader)
    print('Test | loss: {} | acc: {}'.format(
        test_loss, test_acc))
    pickle.dump(y_true, open(f'test_true_labels.pkl', 'wb'))
    pickle.dump(y_pred, open(f'test_pred_labels.pkl', 'wb'))

    log_results = {'train_acc': train_accs,
                   'train_loss': train_losses,
                   'val_acc': val_accs,
                   'val_loss': val_losses,
                   'best_loss': best_val_loss,
                   'best_acc': best_val_acc,
                   'test_loss': test_loss,
                   'test_acc': test_acc
                   }
    pickle.dump(log_results, open(f'log_results.pkl', 'wb'))
def run_statistics(file_name, graph_name, parameter, axis):
    # excel initialize
    e = excel_tools()
    file_dir = 'C:\\Users\\Taire\\Desktop\\TylerPapers\\data_workspace\\{}_statistics.xls'.format(
        file_name)

    # mean and std vectors for each group and run
    exclusion_parameters = {}

    # wide group normal
    inclusion_parameters = {
        'group': 'wide',
        'gait': 'normal',
        'axis': axis,
        'parameter': parameter
    }
    extracted, idx = extract_data(data, inclusion_parameters,
                                  exclusion_parameters)
    constant_list, wide_normal = part_compute(extracted, 'name', np.mean)
    _, wide_normal_std = part_compute(extracted, 'name', np.std, ddof=1)

    # write excel
    e.write_column(e.mean_std, 'participant', constant_list)
    e.write_column(e.mean_std, 'wide_normal_means', wide_normal)
    e.write_column(e.mean_std, 'wide_normal_std', wide_normal_std)

    # wide group toe-in
    inclusion_parameters = {
        'group': 'wide',
        'gait': 'toe-in',
        'axis': axis,
        'parameter': parameter
    }
    extracted, idx = extract_data(data, inclusion_parameters,
                                  exclusion_parameters)
    _, wide_in = part_compute(extracted, 'name', np.mean)
    _, wide_in_std = part_compute(extracted, 'name', np.std, ddof=1)

    # write excel
    e.write_column(e.mean_std, '', [''])
    e.write_column(e.mean_std, 'wide_in_means', wide_in)
    e.write_column(e.mean_std, 'wide_in_std', wide_in_std)

    # narrow group normal
    inclusion_parameters = {
        'group': 'narrow',
        'gait': 'normal',
        'axis': axis,
        'parameter': parameter
    }
    extracted, idx = extract_data(data, inclusion_parameters,
                                  exclusion_parameters)
    constant_list, narrow_normal = part_compute(extracted, 'name', np.mean)
    _, narrow_normal_std = part_compute(extracted, 'name', np.std, ddof=1)

    # write excel
    e.write_column(e.mean_std, '', [''])
    e.write_column(e.mean_std, 'participant', constant_list)
    e.write_column(e.mean_std, 'narrow_normal_means', narrow_normal)
    e.write_column(e.mean_std, 'narrow_normal_std', narrow_normal_std)

    # wide group toe-in
    inclusion_parameters = {
        'group': 'narrow',
        'gait': 'toe-out',
        'axis': axis,
        'parameter': parameter
    }
    extracted, idx = extract_data(data, inclusion_parameters,
                                  exclusion_parameters)
    _, narrow_out = part_compute(extracted, 'name', np.mean)
    _, narrow_out_std = part_compute(extracted, 'name', np.std, ddof=1)

    # write excel
    e.write_column(e.mean_std, '', [''])
    e.write_column(e.mean_std, 'narrow_out_means', narrow_out)
    e.write_column(e.mean_std, 'narrow_out_std', narrow_out_std)

    # determine amount of rounding
    if parameter in ['KNEE_IMPULSE', 'KNEE_MIN_Y']:
        round_dec = 4
    else:
        round_dec = 1

    # statistical analysis wide comparison
    _, wide_p = ttest_rel(wide_normal, wide_in)
    wide_d = cohens_d.within_group(wide_normal, wide_in)

    wide_normal_mean = str(np.round(np.mean(wide_normal), decimals=round_dec))
    wide_normal_std = str(
        np.round(np.std(wide_normal, ddof=1), decimals=round_dec))
    wide_in_mean = str(np.round(np.mean(wide_in), decimals=round_dec))
    wide_in_std = str(np.round(np.std(wide_in, ddof=1), decimals=round_dec))

    wide_stat = [
        wide_normal_mean + ' ± ' + wide_normal_std,
        wide_in_mean + ' ± ' + wide_in_std,
        str(np.round(wide_p, decimals=3)),
        str(np.round(wide_d, decimals=3))
    ]

    # write excel
    e.write_row(e.statistics, '', ['habitual', 'toe-in', 'P', 'd'])
    e.write_row(e.statistics, 'wide_stat', wide_stat)
    e.write_row(e.statistics, '', [''])

    # graph data
    graph_dir = 'C:\\Users\\Taire\\Desktop\\TylerPapers\\data_workspace\\{}'.format(
        file_name + '_wide')
    title = 'P = {}, d = {}'.format(wide_stat[-2], wide_stat[-1])
    plot_comparisons(title,
                     graph_name,
                     'Habitual \n FPA',
                     'Toe-in \n FPA',
                     np.mean(wide_normal),
                     np.std(wide_normal, ddof=1),
                     np.mean(wide_in),
                     np.std(wide_in, ddof=1),
                     graph_dir,
                     file_type='png')

    # statistical analysis narrow comparison
    _, narrow_p = ttest_rel(narrow_normal, narrow_out)
    narrow_d = cohens_d.within_group(narrow_normal, narrow_out)

    narrow_normal_mean = str(
        np.round(np.mean(narrow_normal), decimals=round_dec))
    narrow_normal_std = str(
        np.round(np.std(narrow_normal, ddof=1), decimals=round_dec))
    narrow_out_mean = str(np.round(np.mean(narrow_out), decimals=round_dec))
    narrow_out_std = str(
        np.round(np.std(narrow_out, ddof=1), decimals=round_dec))

    narrow_stat = [
        narrow_normal_mean + ' ± ' + narrow_normal_std,
        narrow_out_mean + ' ± ' + narrow_out_std,
        str(np.round(narrow_p, decimals=3)),
        str(np.round(narrow_d, decimals=3))
    ]

    # write excel
    e.write_row(e.statistics, '', ['habitual', 'toe-out', 'P', 'd'])
    e.write_row(e.statistics, 'narrow_stat', narrow_stat)
    e.write_row(e.statistics, '', [''])

    # graph data
    graph_dir = 'C:\\Users\\Taire\\Desktop\\TylerPapers\\data_workspace\\{}'.format(
        file_name + '_narrow')
    title = 'P = {}, d = {}'.format(narrow_stat[-2], narrow_stat[-1])
    plot_comparisons(title,
                     graph_name,
                     'Habitual \n FPA',
                     'Toe-out \n FPA',
                     np.mean(narrow_normal),
                     np.std(narrow_normal, ddof=1),
                     np.mean(narrow_out),
                     np.std(narrow_out, ddof=1),
                     graph_dir,
                     file_type='png')

    # statistical analysis habitual comparison
    _, habitual_p = ttest_ind(wide_normal, narrow_normal)
    habitual_d = cohens_d.between_group(wide_normal, narrow_normal)

    narrow_normal_mean = str(
        np.round(np.mean(narrow_normal), decimals=round_dec))
    narrow_normal_std = str(
        np.round(np.std(narrow_normal, ddof=1), decimals=round_dec))
    wide_normal_mean = str(np.round(np.mean(wide_normal), decimals=round_dec))
    wide_normal_std = str(
        np.round(np.std(wide_normal, ddof=1), decimals=round_dec))

    habitual_stat = [
        narrow_normal_mean + ' ± ' + narrow_normal_std,
        wide_normal_mean + ' ± ' + wide_normal_std,
        str(np.round(habitual_p, decimals=3)),
        str(np.round(habitual_d, decimals=3))
    ]
    # write excel
    e.write_row(e.statistics, '', ['narrow', 'wide', 'P', 'd'])
    e.write_row(e.statistics, 'habitual_stat', habitual_stat)
    e.write_row(e.statistics, '', [''])

    # graph data
    graph_dir = 'C:\\Users\\Taire\\Desktop\\TylerPapers\\data_workspace\\{}'.format(
        file_name + '_inter')
    title = 'P = {}, d = {}'.format(habitual_stat[-2], habitual_stat[-1])
    plot_comparisons(title,
                     graph_name,
                     'Narrow \n FPA group',
                     'Wide \n FPA group',
                     np.mean(narrow_normal),
                     np.std(narrow_normal, ddof=1),
                     np.mean(wide_normal),
                     np.std(wide_normal, ddof=1),
                     graph_dir,
                     file_type='png')

    # save results
    e.workbook.save(file_dir)
Beispiel #23
0
    print('Completed in {0:.2f} seconds'.format(end - start))


if __name__ == "__main__":
    all_analytics = []

    with open("output.csv", "w") as file:
        file.write(",".join([
            "dataset", "kernel", "normalization", "accuracy", "precision",
            "recall", "fscore"
        ]) + "\n")
        for dataset in DATASETS:
            print("Preprocessing data from {0}".format(dataset))
            start = time.time()
            # extract real data from csv
            graphs = ed.extract_data(dataset)
            # generate fake data
            fake_graphs = ed.shuffle_graphs(graphs, classification_difficulty)
            # combining & shuffling real and fake data
            complete_data = ed.combine(graphs, fake_graphs)
            # splitting training & testing sets
            bound = int(len(complete_data) * training_ratio)
            training_set = complete_data[:bound]
            testing_set = complete_data[bound:]
            end = time.time()
            print("Preprocessing complete in {0:.2f} seconds".format(end -
                                                                     start))
            for kernel in KERNELS:
                for n in NORMALIZATIONS:
                    da = DatasetAnalytics(dataset, kernel, n, training_set,
                                          testing_set)
Beispiel #24
0
import json
import datetime
import time

from phrasing_url import phrasing_url
from extract_data import extract_data

initial_url = "https://www.zhipin.com/c101280600/?query=%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90&page="

dic = []
i = 1
while i < 11:
    print("\n\n" + "pharsing page: " + str(i))
    url = initial_url + str(i) + '&ka=page-' + str(i)
    print(url)
    html = phrasing_url(url)
    dic.append(extract_data(html))
    print("Time now: " + str(datetime.datetime.now()))
    time.sleep(60)
    i = i + 1

with open('bosszhipin2.txt', 'w', encoding='utf-8') as f:
    f.write(json.dumps(dic, ensure_ascii=False))
"""
Created on Fri Dec 20 15:32:21 2019

This is the master script that runs the following processes

#1 Reading the observed surge time series for 902 tide gauges

#2 Reading netcdf files to extract predictors for each tide gauge

#3 Concatenating predictors with corresponding surge time series 
   for each tide gauge and save as a dataframe
   
#4 Saving the dataframe as a .csv in the ../pred_and_surge

*delta: distance (in degrees) from the tide gauge - this will form 
        a grid of delta x delta box around each tide gauge
@author: Master Script
"""

import os 
os.chdir("E:\data\scripts\modeling_storm_surge")
from extract_data import extract_data
         
delta = 5
extract_data(delta)

         



Beispiel #26
0
#Creating endpoints
@app.route("/contacts")
def get_contacts():
	"""
	This functions returns response in JSON format for the given endpoint.
	
	args: None
		Variables used: name, revenue_gte,company_id 
	return:  response <Json>
	"""
    name="{}".format(request.args.get('name','none'))
    revenue_gte="{}".format(request.args.get('revenue_gte', 'none'))
    company_id="{}".format(request.args.get('company_id', 'none'))
	# Filter based on name
    if name != 'none':
        data = json.dumps(extract_data(specific_contact_name=name), indent=2)
	# Filter based on greater than or equal to revenue given
    elif revenue_gte != 'none':
        data = json.dumps(extract_data(specific_revenue=revenue_gte), indent=2)
	# Filter based on company id
    elif company_id != 'none':
        data = json.dumps(extract_data(specific_company_id=company_id), indent=2)
	# Else fetch all contacts
    else:
        data = json.dumps(extract_data(), indent=2)

    response = app.response_class(
        response=data,
        status=200,
        mimetype='application/json'
    )
Beispiel #27
0
'''
1、需寻找的图片网址为Project以及Unitia
Unitia以 https://ba.hitomi.la/galleries/1294943/ 开头,
后接图片名
Project以 https://ba.hitomi.la/galleries/1286145/ 开头

2、图片名用列表保存
'''

from extract_data import extract_data
from download import download

unitia = "https://hitomi.la/reader/1294943.html"

project = "https://hitomi.la/reader/1286145.html"

unitia_img = extract_data(unitia)
unitia_id = unitia.split('reader' + '/')[1].split('.html')[0]
print(unitia_id)

download(unitia_img, unitia_id)
Beispiel #28
0
        help="regularization coefficient",
        required=True,
    )

    args = parser.parse_args()

    commondir = args.scriptdir

    sys.path.append(commondir)
    from utils import load_data
    from extract_data import extract_data

    # load data into Pandas dataframe
    data_dir = args.datadir
    if not os.path.exists(os.path.join(data_dir, "energy.csv")):
        extract_data(data_dir)

    energy = load_data(data_dir)

    # parse values of hyperparameters
    T = int(args.T)
    LATENT_DIM_1 = int(args.LATENT_DIM_1)
    LATENT_DIM_2 = int(args.LATENT_DIM_2)
    BATCH_SIZE = int(args.BATCH_SIZE)
    LEARNING_RATE = float(args.LEARNING_RATE)
    ALPHA = float(args.ALPHA)

    # train and evaluate RNN multi-step network with given values of hyperaparameters
    run_training(energy, T, LATENT_DIM_1, LATENT_DIM_2, BATCH_SIZE,
                 LEARNING_RATE, ALPHA)
Beispiel #29
0
def generate_statistics(path):
    all_wav_files_ordered = []
    wav_file_sizes = []
    text_transcriptions = []
    speakers = []
    sexes = []
    births = []
    youths = []

    cnt = 0
    counter_short_long = 0
    for path, subdirs, files in os.walk(path):
        for name in files:
            if not name.endswith('spl'):
                continue

            spl_path = os.path.join(path, name)
            speaker, name, sex, region_of_birth, region_of_youth, wavs_and_transcriptions = extract_data(spl_path)



            wav_path = spl_path.replace('.spl', '/').replace('data', 'speech')

            for key in wavs_and_transcriptions:

                final_path = f'{wav_path}{key}'

                if not os.path.isfile(final_path):
                    continue

                f = sf.SoundFile(final_path)
                duration = len(f) / f.samplerate
                #

                # if duration < 2:
                #     counter_short_long += 1
                #     continue

                cnt += 1
                if cnt % 1000 == 0:
                    print(cnt)

                all_wav_files_ordered.append(final_path)
                wav_file_sizes.append(os.path.getsize(final_path))

                speakers.append(speaker)

                lower_text = wavs_and_transcriptions[key].lower() \
                    .replace(',', ' ').replace('è', "e").replace('é', "e").replace("ÿ", "y").replace("ü", "u")

                for c in string.punctuation:
                    lower_text = lower_text.replace(c, " ")

                text_transcriptions.append(lower_text)
                sexes.append(sex)
                births.append(region_of_birth)
                youths.append(region_of_youth)

    group_sexes = Counter(sexes)
    group_births = Counter(births)
    group_youths = Counter(youths)
    group_speakers = Counter(speakers)

    print('=========SEXES==========')
    print(group_sexes.values())
    print(group_sexes.keys())
    print('===================')

    print('=========births==========')
    print(group_births.values())
    print(group_births.keys())
    print('===================')

    print('=========youths==========')
    print(group_youths.values())
    print(group_youths.keys())
    print('===================')

    print('=========speakers==========')
    print(len(group_speakers.values()))
    print(group_speakers.values())
    print(group_speakers.keys())
    print('===================')

    print(counter_short_long)
Beispiel #30
0
import numpy as np
from extract_data import extract_data


def floyd(dis_map):
    vertices_num = len(dis_map)
    for k in range(1, vertices_num):
        for i in range(1, vertices_num):
            for j in range(1, vertices_num):
                temp = dis_map[i][k] + dis_map[k][j]
                if dis_map[i][j] > temp:
                    dis_map[i][j] = temp
    for i in range(vertices_num):
        dis_map[i][i] = 0
    return dis_map


def get_information(filename):
    info, req_edges, dis_map = extract_data(filename)
    dis_map = floyd(dis_map)
    return info, req_edges, dis_map


if __name__ == '__main__':
    filename = 'C:\FILES and WORKS\学习\大三上\AI\AILAB\CARP\CARPResource\Proj2_Carp\CARP_samples\gdb1.dat'
    info, req_edges, dis_map = extract_data(filename)
    # print(dis_map)
    # print()
    dis_map = floyd(dis_map)
    print(dis_map)
Beispiel #31
0
def generate_statistics_from_file(path, wavs):
    wav_paths_read = []
    speakers = []
    sexes = []
    births = []
    youths = []

    cnt = 0
    for path, subdirs, files in os.walk(path):
        for name in files:
            if not name.endswith('spl'):
                continue

            spl_path = os.path.join(path, name)
            speaker, name, sex, region_of_birth, region_of_youth, wavs_and_transcriptions = extract_data(spl_path)

            wav_path = spl_path.replace('.spl', '/').replace('data', 'speech')

            for key in wavs_and_transcriptions:

                final_path = f'{wav_path}{key}'

                # if final_path not in wavs:
                #     continue

                cnt += 1
                if cnt % 100000 == 0:
                    print(cnt)

                speakers.append(speaker)
                wav_paths_read.append(final_path)
                sexes.append(sex)
                births.append(region_of_birth)
                youths.append(region_of_youth)


    indices = []
    for path in wavs:
        indices.append(wav_paths_read.index(path))
    # indices = int(indices)

    group_sexes = Counter([sexes[i] for i in indices])
    group_births = Counter([births[i] for i in indices])
    group_youths = Counter([youths[i] for i in indices])
    group_speakers = Counter([speakers[i] for i in indices])

    print('=========SEXES==========')
    print(group_sexes.values())
    print(group_sexes.keys())
    print('===================')

    print('=========births==========')
    print(group_births.values())
    print(group_births.keys())
    print('===================')

    print('=========youths==========')
    print(group_youths.values())
    print(group_youths.keys())
    print('===================')

    print('=========speakers==========')
    print(group_speakers.values())
    print(group_speakers.keys())
    print('===================')
Beispiel #32
0
def get_information(filename):
    info, req_edges, dis_map = extract_data(filename)
    dis_map = floyd(dis_map)
    return info, req_edges, dis_map
Beispiel #33
0
def run_interrupt(ifile):
    """
    run all sci run plot routines
    input:  ifile                --- input file name. if it is not given, the script will ask
    output: <plot_dir>/*.png    --- ace data plot
            <ephin_dir>/*.png   --- ephin data plot
            <goes_dir>/*.png    --- goes data plot
            <xmm_dir>/*.png     --- xmm data plot
            <html_dir>/*.html   --- html page for that interruption
            <web_dir>/rad_interrupt.html    --- main page
    """
    #
    #--- check input file exist, if not ask
    #
    test = exc_dir + ifile
    if not os.path.isfile(test):
        ifile = input('Please put the intrrupt timing list: ')
#
#--- extract data
#
    print("Extracting Data")
    edata.extract_data(ifile)

    data = mcf.read_data_file(ifile)

    for ent in data:
        atemp = re.split('\s+|\t+', ent)
        event = atemp[0]
        start = atemp[1]
        stop = atemp[2]
        gap = atemp[3]
        itype = atemp[4]

        print("PLOTING: " + str(event))
        #
        #--- plot Ephin data
        #
        print("EPHIN/HRC")
        ephin.plot_ephin_main(event, start, stop)
        #
        #---- plot GOES data
        #
        print("GOES")
        goes.plot_goes_main(event, start, stop)
        #
        #---- plot other radiation data (from NOAA)
        #
        print("NOAA")
        ace.start_ace_plot(event, start, stop)
        #
        #---- extract and plot XMM data
        #
        print("XMM")
        xmm.read_xmm_and_process(event)

#
#---- create indivisual html page
#
    print("HTML UPDATE")
    srphtml.print_each_html_control(ifile)
    #
    #---- update main html page
    #
    srphtml.print_each_html_control()