Esempio n. 1
0
def generate_balanced_dataset():
    train_file = '../data_pickle/training.pkl'
    test_file = '../data_pickle/dev.pkl'
    train = load_data(train_file)
    test = load_data(test_file)

    real_articles = get_articles_from_label(train['data'], train['labels'], 1)
    fake_articles = get_articles_from_label(train['data'], train['labels'], 0)

    bal_real_articles = balance_dataset(real_articles)
    bal_fake_articles = balance_dataset(fake_articles)

    new_articles = bal_real_articles + bal_fake_articles
    new_labels = ([1] * len(bal_real_articles)) + ([0] *
                                                   len(bal_fake_articles))
    bal_data = {'data': new_articles, 'labels': new_labels}
def generate_balanced_dataset():
    train_file = '../data_pickle/training.pkl'
    test_file = '../data_pickle/dev.pkl'
    train = load_data(train_file)
    test = load_data(test_file)
    
    real_articles = get_articles_from_label(train['data'], train['labels'], 1)
    fake_articles = get_articles_from_label(train['data'], train['labels'], 0)
    
    bal_real_articles = balance_dataset(real_articles)
    bal_fake_articles = balance_dataset(fake_articles)
    

    new_articles = bal_real_articles + bal_fake_articles
    new_labels = ([1] * len(bal_real_articles)) + ([0] * len(bal_fake_articles))
    bal_data = { 'data':new_articles, 'labels':new_labels }
Esempio n. 3
0
    def make():
        try:
            vars = []
            parameters={}
            for var in variables.keys():
                vars.append(var.get())
            if len(vars) != len(set(vars)):
                msgbox.showerror('Error', 'choosing the same parameter twice is not allowed\n'
                                          'plese delet one')
                return 1

            for plot in plots:
                for axis in plots[plot]:
                    if not axis.get() in vars+SUPPORTED_FIT_RESULTS.keys():
                        if axis.get() == '':
                            msgbox.showerror('Error', 'enpthy field not allowed in plot')
                            return 1
                        else:
                            msgbox.showerror('Error', 'parameter '+axis.get()+' was not set')
                            return 1


            for variable in  variables.keys():
                parameters[variable.get()]=eval(variables[variable].get())

            experiment1 = Experiment(**parameters)
            print 'initializing...'
            experiment1.initialize()
            print 'building...'
            tree_root = build_tree(experiment1.Tree)
            with open(tree_root+'\\parameters.json', 'w') as fo:
                json.dump(experiment1.parameters, fo)
            print 'build done!'
            count_limit = device.run(experiment1, tree_root, experiment1.parameters['freqency_list'], experiment1.parameters['powers_list'])
            print count_limit
            while True:
                if device.counter.value==count_limit:
                    break
            print '\ndone!'
            data = experiment.load_data('./tree')
            plot_list = []
            for plot in plots:
                if len(plots[plot]) == 3:
                    plot_list.append(([plots[plot][0].get(),plots[plot][1].get(),plots[plot][2].get()],))
                elif len(plots[plot]) == 2:
                    plot_list.append(([plots[plot][0].get(), plots[plot][1].get()],))
            # report_generator.make_report(data, plot_list)
            print 'a'

        except:
            msgbox.showerror('Error', 'plese check the following things:\n'
                                      '* all values are in brackets\n'
                                      '* all values are separated by commas\n'
                                      '* make shure that freqency_list and powers are set\n'
                                      'example:\n'
                                      '\t[1.25, 25, 1.235e+3, 1.74e-6]')
Esempio n. 4
0
#  exp.convert_data(strain)

strains = exp.strains
strains = ['tdc-1', 'daf-7', 'tph-1']
strains = ['N2']

strain = 'N2'
feat = 'roam'

save_fig = exp.figname('2016_12_20')
save_fig = None

for strain in strains:
    ### Load worms

    data = exp.load_data(strain)
    nworms = data.nworms

    ### Stage time distribution

    ssort = np.argsort(data.total_time)

    fig = plt.figure(1)
    plt.clf()
    plt.subplot(3, 2, 1)
    plt.plot(np.array(data.total_time)[ssort])
    #for s in range(data.nstages-1):
    #  o = np.argsort(data.stage_durations[:,s]);
    #  plt.plot(np.array(data.total_time)[o]);
    plt.title('%s %s - total time' % (strain, feat))
fig = plt.figure(412); plt.clf();
fplt.plot_pca(dt[:,:-1])

fig.savefig(os.path.join(fig_directory, 'stage_durations_pca.pdf'))






#%% Get stage durations from Roaming Dwelling data set

strain = 'N2';

rd_data = rexp.load_data(strain);

rd_stage_ids = rd_data.stage_switch;
rd_stage_dur = rd_data.stage_durations;

#%% Get stage durations from automatic detection

xy_stage_ids = np.load(os.path.join(exp.data_directory, 'transitions_times.npy'))
xy_stage_dur = np.diff(xy_stage_ids, axis = 1)


#%% Plot Stage Durations 


fig = plt.figure(1); plt.clf();
rate = 3.0 * 60 * 60;

#%% Load / Prepare data

data = {}
dat_bin = {}
dat_mean = {}
dat_var = {}
dat = {}
stage_bins = {}
dat_bin_s = {}

for strain in strains:
    print 'processing %s...' % strain

    data[strain] = exp.load_data(strain)
    dat[strain] = getattr(data[strain], feat)

    sbinsb = exp.stage_bins(data[strain], nbins=sbins)

    dat_bin[strain] = exp.bin_data(dat[strain], sbinsb)
    dat_mean[strain] = np.mean(dat_bin[strain], axis=0)
    dat_var[strain] = np.var(dat_bin[strain], axis=0)

    stage_bins[strain] = exp.stage_bins(data[strain], nbins=1)
    dat_bin_s[strain] = exp.bin_data(dat[strain], stage_bins[strain])

#%% Order by activity

order = {}
for strain in strains:
fig = plt.figure(412); plt.clf();
fplt.plot_pca(dt[:,:-1])

fig.savefig(os.path.join(fig_directory, 'stage_durations_pca.pdf'))






#%% Get stage durations from Roaming Dwelling data set

strain = 'N2';

rd_data = rexp.load_data(strain);

rd_stage_ids = rd_data.stage_switch;
rd_stage_dur = rd_data.stage_durations;

#%% Get stage durations from automatic detection

xy_stage_ids = np.load(os.path.join(exp.data_directory, 'transitions_times.npy'))
xy_stage_dur = np.diff(xy_stage_ids, axis = 1)


#%% Plot Stage Durations 


fig = plt.figure(1); plt.clf();
rate = 3.0 * 60 * 60;
xy_to_rd = -np.ones(len(xy_name), dtype=int)
for i in range(len(xy_name)):
    pos = np.nonzero(rd_name == xy_name[i])[0]
    if len(pos) > 0:
        xy_to_rd[i] = pos

# correct for restarted exp 20/21

### Get stage durations from Roaming Dwelling data set

os.chdir(dir_roaming)
import experiment as exprd

strain = 'N2'

rd_data = exprd.load_data(strain)
assert rd_data.stage_durations.shape[0] == rd_name.shape[0]

rd_stage_ids = rd_data.stage_switch
rd_stage_dur = rd_data.stage_durations

### Get stage durations from automatic detection

xy_stage_ids = np.load(
    os.path.join(exp.experiment_directory, 'transitions_times.npy'))
xy_stage_dur = np.diff(xy_stage_ids, axis=1)

### Plot Stage Durations

plt.figure(1)
plt.clf()
reload(exp)
print 'working at %s' % exp.base_directory

#%% Load data sets and compare speed

import scripts.preprocessing.filenames as f;
strain = 'daf7'
nworms, exp_names, dir_names = f.filenames(strain = strain);


#%%

straind = 'daf-7'
strain = 'daf7';

rd_data = dexp.load_data(strain = straind);
rd_speed = rd_data.speed;
rd_speed_th = rd_speed.copy();
th = np.nanpercentile(rd_speed, 95);
rd_speed_th[rd_speed_th > th] = th;

fplt.plot_array(rd_speed_th)


v = [];
for wid in range(nworms):
  v.append(exp.load(strain = strain, dtype = 'speed', wid = wid, memmap = None));
ntimes = max([len(vv) for vv in v])


#v = [];
Esempio n. 10
0
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 13 01:22:12 2016

@author: ckirst
"""

import experiment as exp
import plot as fplt

data = exp.load_data('N2')

data = exp.add_positions(data)

wid = 0
fplt.plot_trace(data.positions[0], data.roam[0])
import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import LabelEncoder

from gensim.models.word2vec import Word2Vec

from experiment import FeatureStacker, WordEmbeddings, Windower, load_data
from experiment import include_features


model = Word2Vec.load(sys.argv[1])
X, y = load_data(sys.argv[2])

X_train_idx, X_test_idx, y_train_idx, y_test_idx = train_test_split(
    range(len(X)), range(len(X)), test_size=0.2, random_state=2014
)
X_train_docs = [X[i] for i in X_train_idx]
y_train_docs = [label for i in y_train_idx for label in y[i]]
X_test_docs = [X[i] for i in X_test_idx]
y_test_docs = [label for i in y_test_idx for label in y[i]]


experiments = [("word",), ("word", "pos"), ("word", "pos", "root"), ("word", "pos", "root", "rel")]
experiments = experiments + [experiment + ("embeddings",) for experiment in experiments]
experiments += [("embeddings",)]

scores = np.zeros((10, len(experiments)))
import seaborn as sb
import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import LabelEncoder

from gensim.models.word2vec import Word2Vec

from experiment import FeatureStacker, WordEmbeddings, Windower, load_data
from experiment import include_features

model = Word2Vec.load(sys.argv[1])
X, y = load_data(sys.argv[2])

X_train_idx, X_test_idx, y_train_idx, y_test_idx = train_test_split(
    range(len(X)), range(len(X)), test_size=0.2, random_state=2014)
X_train_docs = [X[i] for i in X_train_idx]
y_train_docs = [label for i in y_train_idx for label in y[i]]
X_test_docs = [X[i] for i in X_test_idx]
y_test_docs = [label for i in y_test_idx for label in y[i]]

experiments = [('word', ), ('word', 'pos'), ('word', 'pos', 'root'),
               ('word', 'pos', 'root', 'rel')]
experiments = experiments + [
    experiment + ('embeddings', ) for experiment in experiments
]
experiments += [('embeddings', )]
reload(exp)
print 'working at %s' % exp.base_directory

#%% Load data sets and compare speed

import scripts.preprocessing.filenames as f

strain = 'daf7'
nworms, exp_names, dir_names = f.filenames(strain=strain)

#%%

straind = 'daf-7'
strain = 'daf7'

rd_data = dexp.load_data(strain=straind)
rd_speed = rd_data.speed
rd_speed_th = rd_speed.copy()
th = np.nanpercentile(rd_speed, 95)
rd_speed_th[rd_speed_th > th] = th

fplt.plot_array(rd_speed_th)

v = []
for wid in range(nworms):
    v.append(exp.load(strain=strain, dtype='speed', wid=wid, memmap=None))
ntimes = max([len(vv) for vv in v])

#v = [];
#for wid in range(nworms):
#  print '%d / %d' % (wid, nworms);