Python extract Beispiele, extract_data.extract Python Beispiele

Beispiel #1

0

Datei anzeigen

def extract_and_run():
    extract()

    model_run_file = NISMOD_PATH.joinpath("config/model_runs/",
                                          model_to_run + ".yml")
    if is_truthy(use_generated_scenario):
        generated_scenario = INPUT_PATH.joinpath(model_to_run + ".yml")
        generated_scenario.parent.mkdir(parents=True, exist_ok=True)
        os.link(generated_scenario, model_run_file)

    go_to_nismod_root = "cd " + str(NISMOD_PATH)

    if not is_truthy(part_of_sos_model):
        run_process(go_to_nismod_root + " && smif list")
        run_process(go_to_nismod_root + " && smif run " + model_to_run)
    else:
        with open(model_run_file, "r") as f:
            model_run = yaml.safe_load(f.read())
        print("Beginning run of   -   ", model_to_run)
        run_process(go_to_nismod_root + " && smif list")
        # run_process(go_to_nismod_root + " && smif decide " + model_to_run)
        # print("Decide step finished")
        run_process(go_to_nismod_root + " && smif before_step " +
                    model_to_run + " --model " + sector_model)
        print("Before step finished")

        if timestep == "":
            for t in model_run["timesteps"]:
                run_for_timestep = (go_to_nismod_root + " && smif step " +
                                    model_to_run + " --model " + sector_model +
                                    " --timestep " + str(t) + " --decision 0")
                print("running - ", run_for_timestep)
                run_process(run_for_timestep)
                print("Run for timestep" + str(t))
        else:
            result_inputs_dir = INPUT_PATH.joinpath(model_to_run)
            additional_inputs_dir = INPUT_PATH.joinpath("additional/")
            if result_inputs_dir.exists():
                print("Copying results from previous step to results folder")
                RESULTS_PATH.parent.mkdir(parents=True, exist_ok=True)
                run_process("cp -ru " + str(result_inputs_dir) + "/ " +
                            str(RESULTS_PATH))
                print("Copied proper results")

            if additional_inputs_dir.exists():
                print("Copying results from transport step to results folder")
                TRANSPORT_ADDITIONAL_OUTPUTS_PATH.mkdir(parents=True,
                                                        exist_ok=True)
                run_process("cp -ru " + str(additional_inputs_dir) + "/* " +
                            str(TRANSPORT_ADDITIONAL_OUTPUTS_PATH))
                print("Copied transport results")

            run_for_timestep = (go_to_nismod_root + " && smif step " +
                                model_to_run + " --model " + sector_model +
                                " --timestep " + timestep + " --decision 0")
            print("running - ", run_for_timestep)
            run_process(run_for_timestep)
            print("Run for timestep" + timestep)

Beispiel #2

0

Datei anzeigen

Datei: batch_extract.py Projekt: rgrg2345/dent

def batch(n1, n2):
    now = n1
    prefix = "LOG"
    while now <= n2:
        fn = prefix + padding(now) + ".TXT"
        if not os.path.exists(fn):
          fn = prefix + padding(now) + ".txt"
        extract(fn)
        now += 1

Beispiel #3

0

Datei anzeigen

def main(_):
    # Import data
    # mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
    im, label = extract('./', 'train_32x32.mat')
    im2, label2 = extract('./', 'test_32x32.mat')
    # Create the model
    x = tf.placeholder(tf.float32, [None, 32, 32, 3])

    # Define loss and optimizer
    y_ = tf.placeholder(tf.float32, [None, 10])

    # Build the graph for the deep net
    y_conv, keep_prob = deepnn(x)

    with tf.name_scope('loss'):
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
                                                                logits=y_conv)
    cross_entropy = tf.reduce_mean(cross_entropy)

    with tf.name_scope('adam_optimizer'):
        train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

    with tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        correct_prediction = tf.cast(correct_prediction, tf.float32)
    accuracy = tf.reduce_mean(correct_prediction)

    graph_location = tempfile.mkdtemp()
    print('Saving graph to: %s' % graph_location)
    train_writer = tf.summary.FileWriter(graph_location)
    train_writer.add_graph(tf.get_default_graph())

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(200):
            # batch = mnist.train.next_batch(50)
            if i % 2 == 0:
                train_accuracy = accuracy.eval(feed_dict={
                    x: im,
                    y_: label,
                    keep_prob: 1.0
                })
                print('step %d, training accuracy %g' % (i, train_accuracy))
            train_step.run(feed_dict={x: im, y_: label, keep_prob: 0.5})

        print('test accuracy %g' % accuracy.eval(feed_dict={
            x: im2,
            y_: label2,
            keep_prob: 1.0
        }))

Beispiel #4

0

Datei anzeigen

def getData():
    global iteration

    a = extract()
    with open('allData.csv', 'a') as allData:
        a.to_csv(allData, header=True, index=False)
    print(iteration, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    iteration = iteration + 1

Beispiel #5

0

Datei anzeigen

Datei: feature_extract.py Projekt: carlosespino11/yelp_fake

def features(polarity,skew=False):
	if skew == False:
		corpus, y = extract(polarity)
	else:
		corpus, y = extract_skew(polarity)
	X_ngram, r_order = bigram_plus(corpus)
	LIWC = add_LIWC(polarity,r_order,y)
	data = np.hstack((X_ngram,np.matrix(LIWC)[:,1:]))
	y = np.squeeze(np.asarray(data[:,-2])).astype(int)
	return data, y

Beispiel #6

0

Datei anzeigen

Datei: test_extract_data.py Projekt: rashley-iqt/vent-ETL-sample

def test_transform():
    setUp()

    try:
        extract(out_file)
        with open(out_file) as f:
            new_lines = f.readlines()
            for line in new_lines:
                i = new_lines.index(line)
                line = line.rstrip("\n")
                print(i)
                expectedLine = expectedLines[i]
                print(line)
                expected = expectedLine.split(seperator)
                actual = line.split(seperator)
                assert actual[0] == expected[0]
                assert float(actual[1]) == float(expected[1])
                assert datetime.datetime.strptime(actual[2], "%Y-%m-%dT%H:%M:%S")
    finally:
        tearDown()

Beispiel #7

0

Datei anzeigen

Datei: feature_extract.py Projekt: xiahn/yelp_fake

def features(polarity,version='ott',skew=False,ngrams=(1,2),save=False):
	if skew == False:
		corpus, y = extract(polarity)
	else:
		corpus, y = extract_skew(polarity)
	X_ngram, r_order = bigram_plus(corpus,ngrams=ngrams,save=save,polarity=polarity)
	#X_tfidf = tf_idf(X_ngram)
	feats = add_LIWC(polarity,r_order)
	if version!= 'ott':
		feats = add_watson(polarity,feats)
	output= add_y(y)
	test = np.hstack((np.matrix(feats),X_ngram))
	X = pd.DataFrame(X_ngram)
	feat_data = feats.merge(X, left_index=True,right_index=True)
	if np.matrix(feat_data).all()!=test.all():
		print('data does not line up')
		print(np.shape(data))
		print(np.shape(test))
		return 
	data = feat_data.merge(output,'left')
	if len(X) != len(data):
		return 
	y = np.squeeze(np.asarray(data[['y_rating','fold']])).astype(int)
	return data, y

Beispiel #8

0

Datei anzeigen

Datei: feature_extract.py Projekt: emcmahon013/yelp_fake

def features(polarity, version="ott", skew=False, ngrams=(1, 2), save=False):
    if skew == False:
        corpus, y = extract(polarity)
    else:
        corpus, y = extract_skew(polarity)
    X_ngram, r_order = bigram_plus(corpus, ngrams=ngrams, save=save, polarity=polarity)
    # X_tfidf = tf_idf(X_ngram)
    feats = add_LIWC(polarity, r_order)
    if version != "ott":
        feats = add_watson(polarity, feats)
    output = add_y(y)
    test = np.hstack((np.matrix(feats), X_ngram))
    X = pd.DataFrame(X_ngram)
    feat_data = feats.merge(X, left_index=True, right_index=True)
    if np.matrix(feat_data).all() != test.all():
        print("data does not line up")
        print(np.shape(data))
        print(np.shape(test))
        return
    data = feat_data.merge(output, "left")
    if len(X) != len(data):
        return
    y = np.squeeze(np.asarray(data[["y_rating", "fold"]])).astype(int)
    return data, y

Beispiel #9

0

Datei anzeigen

Datei: feature_extract.py Projekt: xiahn/yelp_fake

	X_ngram, r_order = bigram_plus(corpus,ngrams=ngrams,save=save,polarity=polarity)
	#X_tfidf = tf_idf(X_ngram)
	feats = add_LIWC(polarity,r_order)
	if version!= 'ott':
		feats = add_watson(polarity,feats)
	output= add_y(y)
	test = np.hstack((np.matrix(feats),X_ngram))
	X = pd.DataFrame(X_ngram)
	feat_data = feats.merge(X, left_index=True,right_index=True)
	if np.matrix(feat_data).all()!=test.all():
		print('data does not line up')
		print(np.shape(data))
		print(np.shape(test))
		return 
	data = feat_data.merge(output,'left')
	if len(X) != len(data):
		return 
	y = np.squeeze(np.asarray(data[['y_rating','fold']])).astype(int)
	return data, y



if __name__ =="__main__":
	corpus, y = extract('positive')
	LSI(corpus)
	#data, y = features('positive',version='watson')
	#data, y = features('negative',version='watson')
	#corpus = ['I stay at this hotel 2 times a year on business and LOVE it! The staff are great, the rooms are spacious and clean, and the location is perfect for shopping and dining on Michigan Ave. Plus if you sign up for Omni Select Membership (for free) you get free wireless internet access. ', 'This a great property, excellent location and wonderful staff. Everyone was very accommodating and polite. The room I had was on the 23rd floor and was like a suite, with a living area and a bedroom. The living room was spacious, with a plasma TV, a desk and a couch. The beds were very comfortable and the toiletries of very good quality. In the closed they placed an umbrella, which came in handy, it rained the whole time I was in Chicago. The internet connection is $9.95/24hrs. Great place, I will return for sure. ', 'This hotel is the perfect location for downtown Chicago shopping. The only thing is the pool is extremely small - it is indoors, but looks much larger on the website. ', 'The Omni is in a fabulous location on Michigan Avenue. Within just blocks are all types of stores, including Saks, Nordstroms, H&M, Filenes Basement, Macys, La Perla, Apple, Bloomingdates.....I could go on and on! The room itself was fabulous. Comfortable, nice big flat screen tvs, nice sized bathroom. They charge for Wi-Fi, but we found if we clicked yes on joining their guest program we could then go from the sign-on screen right to our email without actually completing the registration. We got this hotel for $214/night through Priceline and felt it was a terrific deal! ', 'The Omni Chicago really delivers on all fronts, from the spaciousness of the rooms to the helpful staff to the prized location on Michigan Avenue. While this address in Chicago requires a high level of quality, the Omni delivers. Check in for myself and a whole group of people with me was under 3 minutes, the staff had plentiful recommendations for dining and events, and the rooms are some of the largest you\'ll find at this price range in Chicago. Even the "standard" room has a separate living area and work desk. The fitness center has free weights, weight machines, and two rows of cardio equipment. I shared the room with 7 others and did not feel cramped in any way! All in all, a great property! ']
	#X_y = bigram_plus(corpus)
	#print(np.shape(X_y))

Beispiel #10

0

Datei anzeigen

Datei: feature_extract.py Projekt: emcmahon013/yelp_fake

        corpus, y = extract_skew(polarity)
    X_ngram, r_order = bigram_plus(corpus, ngrams=ngrams, save=save, polarity=polarity)
    # X_tfidf = tf_idf(X_ngram)
    feats = add_LIWC(polarity, r_order)
    if version != "ott":
        feats = add_watson(polarity, feats)
    output = add_y(y)
    test = np.hstack((np.matrix(feats), X_ngram))
    X = pd.DataFrame(X_ngram)
    feat_data = feats.merge(X, left_index=True, right_index=True)
    if np.matrix(feat_data).all() != test.all():
        print("data does not line up")
        print(np.shape(data))
        print(np.shape(test))
        return
    data = feat_data.merge(output, "left")
    if len(X) != len(data):
        return
    y = np.squeeze(np.asarray(data[["y_rating", "fold"]])).astype(int)
    return data, y


if __name__ == "__main__":
    corpus, y = extract("positive")
    LSI(corpus)
    # data, y = features('positive',version='watson')
    # data, y = features('negative',version='watson')
    # corpus = ['I stay at this hotel 2 times a year on business and LOVE it! The staff are great, the rooms are spacious and clean, and the location is perfect for shopping and dining on Michigan Ave. Plus if you sign up for Omni Select Membership (for free) you get free wireless internet access. ', 'This a great property, excellent location and wonderful staff. Everyone was very accommodating and polite. The room I had was on the 23rd floor and was like a suite, with a living area and a bedroom. The living room was spacious, with a plasma TV, a desk and a couch. The beds were very comfortable and the toiletries of very good quality. In the closed they placed an umbrella, which came in handy, it rained the whole time I was in Chicago. The internet connection is $9.95/24hrs. Great place, I will return for sure. ', 'This hotel is the perfect location for downtown Chicago shopping. The only thing is the pool is extremely small - it is indoors, but looks much larger on the website. ', 'The Omni is in a fabulous location on Michigan Avenue. Within just blocks are all types of stores, including Saks, Nordstroms, H&M, Filenes Basement, Macys, La Perla, Apple, Bloomingdates.....I could go on and on! The room itself was fabulous. Comfortable, nice big flat screen tvs, nice sized bathroom. They charge for Wi-Fi, but we found if we clicked yes on joining their guest program we could then go from the sign-on screen right to our email without actually completing the registration. We got this hotel for $214/night through Priceline and felt it was a terrific deal! ', 'The Omni Chicago really delivers on all fronts, from the spaciousness of the rooms to the helpful staff to the prized location on Michigan Avenue. While this address in Chicago requires a high level of quality, the Omni delivers. Check in for myself and a whole group of people with me was under 3 minutes, the staff had plentiful recommendations for dining and events, and the rooms are some of the largest you\'ll find at this price range in Chicago. Even the "standard" room has a separate living area and work desk. The fitness center has free weights, weight machines, and two rows of cardio equipment. I shared the room with 7 others and did not feel cramped in any way! All in all, a great property! ']
    # X_y = bigram_plus(corpus)
    # print(np.shape(X_y))

Beispiel #11

0

Datei anzeigen

    elif op == '-s' or op == '--SingleEnd':
        readType = 'Single'
    elif op == '-c' or op == '--Condition':
        condition = int(value)
    elif op == '-r' or op == '--Replicate':
        replicate = int(value)
    elif op == '-g' or op == '--GeneFile':
        geneFile = value
    elif op == '-b' or op == '--Bound':
        cutoff = float(value)
    elif op == '-h' or op == '--Help':
        usage()
        sys.exit()
# check the cmd

extract_data.extract(input_path)

data_exist = ref_exist and input_exist and out_exist and ref_type_exist
if (not data_exist):
    usage()
    sys.exit()
    print 'Incomplete input parameters!!!'
if condition * replicate != len(input_path):
    print 'The input file num is wrong!'
    usage()
    sys.exit()

start_time = time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime(time.time()))

if DEBUG == True:
    pdb.set_trace()

Beispiel #12

0

Datei anzeigen

Datei: main.py Projekt: gary159/yelp_fake

import os, codecs 
import  numpy as np
import pandas as pd
import nltk
from extract_data import extract
from feature_extract import bigram_plus, tf_idf


if __name__ == "__main__":
	pos_corpus, pos_test = extract('positive')
	neg_corpus, neg_test = extract('negative')
	X_y = bigram_plus(pos_corpus)
	print(np.shape(X_y),np.shape(pos_test))

Beispiel #13

0

Datei anzeigen

    print("\n" + 100 * "*" + "\n" + num * "-" + " " + msg + " " + num * "-" +
          "\n" + 100 * "*" + "\n")


"""
Main class of the application
Input  : input script parameters
"""
if __name__ == "__main__":
    parameter = cli_params(sys.argv[1:])
    # full logging if verbose mode is on
    if parameter.verbose:
        logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                            level=logging.INFO)

    # Step 1: Web scraping
    print_step("Step 1: Scraping pubmed database")
    pmids, results = extract_data.extract(parameter)
    print_step("End step 1")

    # Step 2: Text processing
    print_step("Step 2: Natural language processing")
    articles, dicctionary, corpus = natural_language_processing.process_docs(
        parameter, pmids, results)
    print_step("End step 2")

    # Step 3: Text similarities
    print_step("Step 3: Text similarities")
    text_similarities.similarity(parameter, articles, dicctionary, corpus)
    print_step("End step 3")

Beispiel #14

0

Datei anzeigen

Datei: batch.py Projekt: axiomiety/pixfiltrator

        '--outDir',
        action='store',
        default='/mnt/c/TEMP/out',
        help='out path for all the captures',
    )
    parser.add_argument(
        '--coords',
        action='store',
        default='coords.json',
        help='path of the file containing the position of the region of interest',
    )
    args = parser.parse_args()

    rois = []
    for fname in (f for f in os.listdir(path=args.captureDir) if 'capture_' in f):
        basename, ext = fname.rsplit('.', 1)
        out_name = f'{basename}_roi.{ext}'
        out_full_path = os.path.join(args.outDir, out_name)
        extract_roi.extract(
            src=os.path.join(args.captureDir, fname),
            dest=out_full_path,
            coords=args.coords,
        )
        rois.append(os.path.join(args.outDir, out_name))

    for roi in rois:
        # bp = os.path.dirname(roi)
        extract_data.extract(
            image=roi, sqWidth=5, regionOnGuest='1200x800', outFile=None, verify=True
        )

Beispiel #15

0

Datei anzeigen

                meanline=True,
                patch_artist=True,
                vert=True,
                labels=labels,
                medianprops=medianprops,
                meanprops=meanprops,
                whiskerprops=whiskerprops,
                capprops=capprops,
                boxprops=boxprops)
    plt.title(title)
    plt.grid(axis="x", ls=":", lw=1, color="gray", alpha=1)
    plt.grid(axis="y", ls=":", lw=1, color="gray", alpha=1)
    plt.savefig(saveto + title + ".jpg")
    plt.show()


if __name__ == '__main__':
    # 地址oct13-num-850-variancexy500-count48-100
    saveto = "/home/iscas/2020_EXPDATA/"
    datapath1 = saveto + "nov7-num-1100-variancexy500-count64-alititude3000-2group-120s-300/"
    # 取数
    score1 = extract(3, datapath1, "/summary.csv", "score")
    # 当前数据
    # tag = "500"
    # 画折线图
    plot_line(score1)
    # 画全部数据箱型图
    plot_box(score1, 0)
    # 画25%-75%数据箱型图
    plot_box(score1, 1)

Beispiel #16

0

Datei anzeigen

def get_data_from_fcs(filename, x_attr, y_attr):
    """Extract data from FCS file"""
    data, channel_names = extract(filename)
    x, y = data[x_attr].values, data[y_attr].values
    return x, y

Beispiel #17

0

Datei anzeigen

Datei: main.py Projekt: laiviet/ml-ptit-2018

def sigmoid(x):
    return 1/(1+np.exp(-x))

def subtract( mA, mB ) :
	# zero matrix
	result = [[0 for y in range(len(mA[0]))] for x in range(len(mA))]
	
	for i in range(len(mA)):
		for j in range(len(mA[i])):
			result[i][j] = mA[i][j]-mB[i][j]
	return result


learning_rate = 0.1
input_data,output_data = extract_data.extract()
input_data = np.delete(input_data,0, axis=1)
train_x,test_x,train_y,test_y = train_test_split(input_data,output_data,test_size = 0.33,random_state = 42)
w1 = np.random.rand(9,4)
w2 = np.random.rand(4,1)
total_loss = []
for i in range(10000):
    z1 = np.matmul(train_x,w1)
    l1 = sigmoid(z1)
    z2 = np.matmul(l1,w2)
    y_hat = sigmoid(z2)
    dw1 = np.matmul(np.transpose(train_x),np.multiply(np.multiply(np.matmul(np.multiply(np.multiply(np.true_divide((y_hat-train_y),(train_y.size/2)),y_hat),(1-y_hat)),np.transpose(w2)),l1),(1-l1)))
    dw2 = np.matmul(np.transpose(l1),np.multiply(np.multiply(np.true_divide((y_hat-train_y),(train_y.size/2)),y_hat),(1-y_hat)))
    w1 = w1 - learning_rate*dw1
    w2 = w2 - learning_rate*dw2
    z1 = np.matmul(test_x,w1)

Beispiel #18

0

Datei anzeigen

Datei: portfolio_strategy_pour_carl.py Projekt: Ratchy/ProjetGestionPortefeuilleRaresCarl

sell_fees = 0.0015  #ça reste constant
buy_fees = 0.0025  #ça reste constant
spread = 0.004  #ça varie d'une crypto à l'autre. Il faudra en prendre compte car c'est gros et ça s'applique à l'achat et à la vente. Ça peut aller de 0.001 à 0.01.
#plus c'est gros et plus ça fait mal, surtout qu'on trade très souvent. Plus c'est petit et plus on peut faire du high frequency trading (ex: 5 min)
#plus c'est gros, moins on peut faire du high frequency trading (ex: 30 min, 1 hr)

################################################# On execute le code ###############################################
sell_fees = 1 / (1 + sell_fees)
buy_fees = 1 / (1 + buy_fees)
spread = 1 / (1 + spread)

df_list = []
altcoins_list = []

#ici on retire les prix des cryptos séléctionnées dans le script "extract_data" à partir de Poloniex
combined_df = extract_data.extract()

#on choisis l'année 2017 à aujourd'hui pour les données
combined_df = combined_df[:]['2017-03-22 00:00:00':'2018-03-23 00:00:00']

#ici on calcule le revenu individuel de chaque crypto si on aurait fait du buy and hold. On calcule aussi le revenu total qui est représenté par "tot_return".
#Cette donnée sert à comparer le profit de notre stratégie au profit d'une stratégie buy an hold.
tot_return = compute_individual_currency_profit(combined_df)

#Ici on prépare les listes qui seront utilisées pour faire la représentation graphique de la stratégie buy and hold et de la stratégie avec buy and hold + marge.
base_liste, base_liste_2 = make_graph_data(combined_df, temps, montant_initial,
                                           leverage)

plott = []
total_pot = montant_initial
array = combined_df.values