def add_guild(self, key):
     found = next((x for x in self._guilds if x.api_key == key), None)
     if found:
         return found.gid
     else:
         tmp = load_data(key, self._sort_items)
         if not hasattr(tmp, 'room'):
             raise InvalidPermissionException
         tmp.api_key = key
         self._guilds.append(tmp)
         return tmp.gid
Example #2
0
def main():

    # load training and validation data (function inside lib.py)
    trPC, vaPC, SC = load_data(band)

    for i in range(0,10):

        manualSeed = i
        np.random.seed(manualSeed)
        random.seed(manualSeed)
        torch.manual_seed(manualSeed)
        # if you are suing GPU
        torch.cuda.manual_seed(manualSeed)
        torch.cuda.manual_seed_all(manualSeed)

        model_name = '{}_batch{}_lr{}_midi{}_{}_sample{}_chunksize{}_{}_{}{}_score{}_NORM_{}'.format(loss_func, batch_size, lr, midi_op, \
                                                                                process_collate, sample_num, chunk_size, \
                                                                                model_choose, band, split, score_choose, manualSeed)
        #'Similarity_batch16_lr0.001_midialigneds_windowChunk1sample10sec_CNN'

        print('batch_size: {}, num_workers: {}, epoch: {}, lr: {}, model_name: {}'.format(batch_size, num_workers, epoch, lr, model_name))
        print('band: {}, feat: {}, midi_op: {}'.format(band, feat, midi_op))

        #check_missing_alignedmidi(band, feat, midi_op)

        # model saving path
        from datetime import date
        date = date.today()
        out_model_fn = './model/%d%d%d/%s/'%(date.year,date.month,date.day,model_name)
        if not os.path.exists(out_model_fn):
            os.makedirs(out_model_fn)

        # prepare dataloader (function inside lib.py)
        t_kwargs = {'batch_size': batch_size, 'num_workers': num_workers, 'shuffle': shuffle, 'pin_memory': True,'drop_last': True}
        v_kwargs = {'batch_size': batch_size, 'num_workers': num_workers, 'pin_memory': True}
        tr_loader = torch.utils.data.DataLoader(Data2Torch([trPC, SC], midi_op), worker_init_fn=np.random.seed(manualSeed), \
                                                collate_fn=partial(my_collate, [process_collate, sample_num, chunk_size]), \
                                                **t_kwargs)
        va_loader = torch.utils.data.DataLoader(Data2Torch([vaPC, SC], midi_op), worker_init_fn=np.random.seed(manualSeed), \
                                                collate_fn=partial(my_collate, [process_collate, sample_num, chunk_size]), \
                                                **t_kwargs)

        # build model (function inside model.py)
        model = JointEmbedNet(model_choose)
        if torch.cuda.is_available():
            model.cuda()

        # start training (function inside train_utils.py)
        Trer = Trainer(model, lr, epoch, out_model_fn)
        Trer.fit(tr_loader, va_loader)

        print(model_name)
Example #3
0
 def __init__(self, sort_items):
     self._translator = Translator('en')
     self._guilds = []
     self._filters = []
     self._sort_items = sort_items
     with open("data/guilds.json", "r") as f:
         keys = json.load(f)
         for k in keys:
             tmp = load_data(k, self._sort_items)
             tmp.api_key = k
             self._guilds.append(tmp)
     self.add_filter(dummy_filter)
     self.current_filter = self._filters[0]
     self.current_guild = None
Example #4
0
def download_data():
    pool = multiprocessing.Pool(processes=NUM_PROCESSES)
    users_df, _ = load_data()

    if not os.path.exists(PHOTOS_PATH):
        os.makedirs(PHOTOS_PATH)
    existing_photos = [
        int(x.split('.')[0]) for x in os.listdir(PHOTOS_PATH)
        if not x.startswith('.')
    ]

    users_df.drop(existing_photos, inplace=True)
    urls = users_df['Photo'].values
    users_ids = users_df.index.values
    list(
        tqdm(pool.imap_unordered(process_url, zip(users_ids, urls)),
             total=len(users_ids)))
Example #5
0
def main():

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    for i in range(4,10):
        manualSeed = i
        np.random.seed(manualSeed)
        random.seed(manualSeed)
        torch.manual_seed(manualSeed)
        # if you are suing GPU
        torch.cuda.manual_seed(manualSeed)
        torch.cuda.manual_seed_all(manualSeed)

        model_name = '{}_{}_batch{}_lr{}_{}_{}_isNorm{}_{}'.format(model_choose, matrix_dim, batch_size, lr, band, split, isNorm, manualSeed)

        print('range: {}, batch_size: {}, num_workers: {}, epoch: {}, lr: {}, model_name: {}'.format(i,batch_size, num_workers, epoch,
                                                                                        lr, model_name))
        print('band: {}, split: {}, matrix_dim: {}'.format(band, split, matrix_dim))

        # model saving path
        from datetime import date
        date = date.today()
        out_model_fn = './model/%d%d%d/%s/' % (date.year, date.month, date.day, model_name)
        if not os.path.exists(out_model_fn):
            os.makedirs(out_model_fn)

        # load training and validation data (function inside lib.py)
        trPC, vaPC = load_data(band)
       
        # prepare dataloader (function inside lib.py)
        t_kwargs = {'batch_size': batch_size, 'shuffle': shuffle, 'pin_memory': True,'drop_last': True}
        v_kwargs = {'batch_size': batch_size, 'pin_memory': True}
        tr_loader = torch.utils.data.DataLoader(Data2Torch([trPC]), worker_init_fn=np.random.seed(manualSeed), **t_kwargs)
        va_loader = torch.utils.data.DataLoader(Data2Torch([vaPC]), worker_init_fn=np.random.seed(manualSeed), **v_kwargs)
        
        # build model (function inside model.py)
        model = DistMatNet(model_name)
        model.to(device)
        
        # start training (function inside train_utils.py)
        Trer = Trainer(model, lr, epoch, out_model_fn)
        Trer.fit(tr_loader, va_loader, device)

        print(model_name)
Example #6
0
def data(request):
    return load_data(request.param)
def test_output_shape():
    data = load_data("dataset_a")
    assert transform(data).shape == (10, 1)
Example #8
0
rn.seed(RANDOM_STATE)
tf.random.set_seed(RANDOM_STATE)

colors = ["#000000", "#222222", "#444444", "#666666", "#888888", "#aaaaaa"]
makers = ["o", "v", "^", "*", "s", "p"]
markerfacecolor = '#ffffff'
plt.figure(figsize=(10, 10))
instance_types = [
    "m3.large", "m5.2xlarge", "m5.large", "m5.xlarge", "r3.xlarge",
    "r5d.xlarge"
]

for it_index in range(len(instance_types)):
    TARGET_TYPE = instance_types[it_index]

    df = lib.load_data(DATA_PATH, TARGET_TYPE)

    df, mean, std = lib.normalize(df)
    (x_train, y_train), (x_test, y_test), columns = lib.train_test_split_lstm(
        df["price"].values, df.index, PAST_HISTORY, TRAIN_RATIO)
    # モデルを定義
    model = create_model(x_train.shape[-2:])
    # モデルを学習
    model.fit(x_train,
              y_train,
              batch_size=BATCH_SIZE,
              epochs=EPOCHS,
              verbose=1,
              validation_data=(x_test, y_test))

    df_10days = lib.load_data(DATA_PATH_ACTUAL, TARGET_TYPE)
Example #9
0
print(
    "-----------------------------------------------------------------------------"
)
print("Use -d FILENAME in command line to set dataset to FILENAME")
print(
    "Use -c CONSTANT in command line to set uncertianty constant to value CONSTANT"
)
print("Use -m MODE in command line to set multiplication mode to value MODE")
print(
    "Use -o FILENAME in command line to set the output filenames to FILENAME")
print("use -t FLOAT in command line to set the convergence threshold")
print(
    "-----------------------------------------------------------------------------"
)

data = load_data(options.data)
print("dataset loaded...")

# Convert data into opinion matrix
print("Convert dataset to Opinion Matrix...")
P = get_opinion(data, options.constant)

# print(P)
# Set plus, times operations with respect to EBSL mode
plus = oplus

if (options.mode & 4) == 4:
    print("otimes")
    times = otimes
else:
    if (options.mode & 1) == 0:
Example #10
0
def data():
    print("\n✨✨✨ setUp")
    return load_data("dataset_a")
Example #11
0
from lib import load_data
import pandas as pd
import numpy as np

_, finder_decisions = load_data()

users_df = pd.read_pickle('users_df')
finder_decisions = finder_decisions.merge(users_df['feature'].to_frame(),
                                          how='left',
                                          left_on='Receiver_id',
                                          right_index=True)
like_preferences = finder_decisions[finder_decisions['Decision'] ==
                                    'like'].groupby(
                                        'Sender_id')['feature'].apply(np.mean)
skip_preferences = finder_decisions[finder_decisions['Decision'] ==
                                    'skip'].groupby(
                                        'Sender_id')['feature'].apply(np.mean)
users_df['like_preferences'] = like_preferences
users_df['skip_preferences'] = skip_preferences

users_df.to_pickle('users_df')
# for id in users_df.index.values:
Example #12
0
import matplotlib.pyplot as plt
import pandas
from numpy import array
from pandas.plotting import register_matplotlib_converters

from learn import exp_window

register_matplotlib_converters()

from lib import normalize, load_data, ALPHA, ALT_SPEED_MEAN, ALT_SPEED_MIN, SPEED_MEAN, SPEED_MIN, ALT_SPEED_MAX, \
    SPEED_MAX, user_events_to_data

user_events_style = {"color": "red", "alpha": 0.33}

(gps, alt, user) = load_data("part_1")

if __name__ == '__main__':
    x = gps['altitude']
    # x = alt["relativeAltitude"]
    smoothed = x.ewm(alpha=ALPHA).mean()
    norm_v_speed = normalize(smoothed.diff(),
                             mean=ALT_SPEED_MEAN,
                             min=ALT_SPEED_MIN,
                             max=ALT_SPEED_MAX)
    norm_speed = normalize(gps['speed'].ewm(alpha=ALPHA).mean(),
                           mean=SPEED_MEAN,
                           min=SPEED_MIN,
                           max=SPEED_MAX)

    normalize(smoothed).plot(label="smoothed")
Example #13
0
def today():
    return lib.load_data(COURSES_PATH).today()
Example #14
0
def day(name):
    return lib.load_data(COURSES_PATH).day(name)
def data():
    print("\n✨✨✨ data setUp")
    yield load_data("dataset_a")
    print("✨✨✨ data tearDown")
Example #16
0
def main(model_name_e):

    train_metrics, val_metrics, test_metrics = [], [], []
    test_metrics_AltoSax, test_metrics_BbClarinet, test_metrics_Flute = [], [], []

    trPC, vaPC = load_data(band)
    tePC = load_test_data(band)
    teAltoSaxPC = load_test_data(band, 'Alto Saxophone')
    teBbClarinetPC = load_test_data(band, 'Bb Clarinet')
    teFlutePC = load_test_data(band, 'Flute')

    kwargs = {'batch_size': batch_size, 'pin_memory': True}
    #kwargs = {'pin_memory': True}
    tr_loader = torch.utils.data.DataLoader(Data2Torch([trPC]), **kwargs)
    va_loader = torch.utils.data.DataLoader(Data2Torch([vaPC]), **kwargs)
    te_loader = torch.utils.data.DataLoader(Data2Torch([tePC]), **kwargs)

    te_AltoSax_loader = torch.utils.data.DataLoader(Data2Torch([teAltoSaxPC]),
                                                    **kwargs)
    te_BbClarinet_loader = torch.utils.data.DataLoader(
        Data2Torch([teBbClarinetPC]), **kwargs)
    te_Flute_loader = torch.utils.data.DataLoader(Data2Torch([teFlutePC]),
                                                  **kwargs)

    print(model_name_e)
    result = {}

    eval_metrics = dict()
    for i in range(0, 10):
        if True:
            model_name = model_name_e + '_' + str(i)

            model_path = './model/' + model_name + '/model'
            # build model (function inside model.py)
            model = DistMatNet(model_name)
            if torch.cuda.is_available():
                model.cuda()
            model.load_state_dict(torch.load(model_path)['state_dict'])
            model.eval()

            for j in [1, 4, 7]:
                model.model.conv[j].bn1.momentum = 0
                model.model.conv[j].bn2.momentum = 0
                model.model.conv[j].bn3.momentum = 0
                model.model.conv[j].bn1.track_running_stats = False
                model.model.conv[j].bn2.track_running_stats = False
                model.model.conv[j].bn3.track_running_stats = False

            print('model :', model_name)
            tr = evaluate_model(model, tr_loader)
            print('train metrics', train_metrics)
            va = evaluate_model(model, va_loader)
            print('valid metrics', val_metrics)
            te = evaluate_model(model, te_loader)
            print('test metrics', test_metrics)
            print('--------------------------------------------------')
            te_AltoSax = evaluate_model(model, te_AltoSax_loader)
            te_BbClarinet = evaluate_model(model, te_BbClarinet_loader)
            te_Flute = evaluate_model(model, te_Flute_loader)

            train_metrics.append(tr)
            val_metrics.append(va)
            test_metrics.append(te)

            test_metrics_AltoSax.append(te_AltoSax)
            test_metrics_BbClarinet.append(te_BbClarinet)
            test_metrics_Flute.append(te_Flute)

            print(tr, va, te, te_AltoSax, te_BbClarinet, te_Flute)
            eval_metrics[i] = (tr, va, te, te_AltoSax, te_BbClarinet, te_Flute)

    eval_metrics['avg'] = (sum(train_metrics) / len(train_metrics),
                           sum(val_metrics) / len(val_metrics),
                           sum(test_metrics) / len(test_metrics),
                           sum(test_metrics_AltoSax) /
                           len(test_metrics_AltoSax),
                           sum(test_metrics_BbClarinet) /
                           len(test_metrics_BbClarinet),
                           sum(test_metrics_Flute) / len(test_metrics_Flute))

    print("{:.3f}, {:.3f}, {:.3f}" .format(sum(train_metrics) / len(train_metrics), \
                                           sum(val_metrics) / len(val_metrics), sum(test_metrics) / len(test_metrics)))

    with open('result/' + model_name_e.split('/')[1] + '.json',
              'w') as outfile:
        json.dump(result, outfile)
Example #17
0
import pandas as pd
import numpy as np
from lib import load_data
#from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
import datetime
from scipy.stats import multivariate_normal, norm

config = {'only_click_touch_type': False, 'standard_scale': False}

data, data_valid, labels = load_data(valid_size=0.2)
data.drop(data[data['Fraud_reasons'] == 'mix'].index, inplace=True)
data_valid.drop(data_valid[data_valid['Fraud_reasons'] == 'mix'].index,
                inplace=True)

data.loc[data['Fraud_reasons'].isnull(), 'Fraud_reasons'] = 'ok'
data_valid.loc[data_valid['Fraud_reasons'].isnull(), 'Fraud_reasons'] = 'ok'
fraud_reasons = data['Fraud_reasons'].unique()

data['install_time'] = pd.to_datetime(data['install_time'])
data_valid['install_time'] = pd.to_datetime(data_valid['install_time'])
data['attributed_touch_time'] = pd.to_datetime(data['attributed_touch_time'])
data_valid['attributed_touch_time'] = pd.to_datetime(
    data_valid['attributed_touch_time'])
data['contributor_1_touch_time'] = pd.to_datetime(
    data['contributor_1_touch_time'], errors='coerce')
data['contributor_2_touch_time'] = pd.to_datetime(
    data['contributor_2_touch_time'], errors='coerce')
data['contributor_3_touch_time'] = pd.to_datetime(
    data['contributor_3_touch_time'], errors='coerce')
data_valid['contributor_1_touch_time'] = pd.to_datetime(
 def setUp(self):  # Runs before each test
     print("\n✨✨✨ setUp")
     self.data = load_data("dataset_a")
Example #19
0
    subprocess.Popen(['xdg-open', src])


def quit_app(icon) -> None:
    icon.stop()


def get_submenu(info: dict[str, str]) -> list[i]:
    allowed = ['website']
    return [
        i('Web page', lambda: open_webpage(v)) for k, v in info.items()
        if k in allowed
    ]


data = load_data(COURSES_PATH)
items = lambda: [
    i(d.describe(), m(*get_submenu(d.course))) for d in data.today().entries
]
items_or_default = lambda: items() if len(items()
                                          ) else [i('(empty)', do_nothing)]

tray = pystray.Icon('Unischedule')
tray.icon = icon_image


def setup(icon):
    icon.menu = (
        *items_or_default(),
        m.SEPARATOR,
        i('Update', lambda: update_icon()),
Example #20
0
 def refresh_guilds(self):
     for g in self._guilds:
         if data_need_reload(g.api_key):
             g = load_data(g.api_key, self._sort_items)
Example #21
0
def main():
    train_metrics, val_metrics, test_metrics = [], [], []
    test_metrics_AltoSax, test_metrics_BbClarinet, test_metrics_Flute = [], [], []

    trPC, vaPC, SC = load_data(band, feat, midi_op)
    tePC = load_test_data(band, feat)
    teAltoSaxPC = load_test_data(band, feat, 'Alto Saxophone')
    teBbClarinetPC = load_test_data(band, feat, 'Bb Clarinet')
    teFlutePC = load_test_data(band, feat, 'Flute')

    print(len(teAltoSaxPC), len(teBbClarinetPC), len(teFlutePC))

    kwargs = {'num_workers': num_workers, 'pin_memory': True}
    tr_loader = torch.utils.data.DataLoader(Data2Torch([trPC, SC], midi_op), \
                                            collate_fn=partial(test_collate, [overlap_flag, chunk_size]), **kwargs)
    va_loader = torch.utils.data.DataLoader(Data2Torch([vaPC, SC], midi_op), \
                                            collate_fn=partial(test_collate, [overlap_flag, chunk_size]), **kwargs)
    te_loader = torch.utils.data.DataLoader(Data2Torch([tePC, SC], midi_op),
                                            collate_fn=partial(
                                                test_collate,
                                                [overlap_flag, chunk_size]),
                                            **kwargs)

    te_AltoSax_loader = torch.utils.data.DataLoader(
        Data2Torch([teAltoSaxPC, SC], midi_op),
        collate_fn=partial(test_collate, [overlap_flag, chunk_size]),
        **kwargs)
    te_BbClarinet_loader = torch.utils.data.DataLoader(
        Data2Torch([teBbClarinetPC, SC], midi_op),
        collate_fn=partial(test_collate, [overlap_flag, chunk_size]),
        **kwargs)
    te_Flute_loader = torch.utils.data.DataLoader(
        Data2Torch([teFlutePC, SC], midi_op),
        collate_fn=partial(test_collate, [overlap_flag, chunk_size]),
        **kwargs)

    eval_metrics = dict()
    for i in range(0, 10):
        model_name = '202059/Similarity_batch32_lr0.05_midialigned_s_{}_sample2_chunksize{}_{}_{}{}_score{}_NORM_' \
                     .format(process_collate, chunk_size, model_choose, band, split, score_choose) + str(i)

        #model_name = model_choose + '_' + str(i)
        # if resize the midi to fit the length of audio
        resample = False
        if midi_op == 'resize':
            resample = True

        model_path = './model/' + model_name + '/model'
        model = JointEmbedNet(model_choose)
        if torch.cuda.is_available():
            model.cuda()
        model.load_state_dict(torch.load(model_path)['state_dict'])
        tr = evaluate_model(model, tr_loader)
        va = evaluate_model(model, va_loader)
        te = evaluate_model(model, te_loader)

        te_AltoSax = evaluate_model(model, te_AltoSax_loader)
        te_BbClarinet = evaluate_model(model, te_BbClarinet_loader)
        te_Flute = evaluate_model(model, te_Flute_loader)

        train_metrics.append(tr)
        val_metrics.append(va)
        test_metrics.append(te)

        test_metrics_AltoSax.append(te_AltoSax)
        test_metrics_BbClarinet.append(te_BbClarinet)
        test_metrics_Flute.append(te_Flute)

        print(tr, va, te, te_AltoSax, te_BbClarinet, te_Flute)
        eval_metrics[i] = (tr, va, te, te_AltoSax, te_BbClarinet, te_Flute
                           )  # (te_AltoSax, te_BbClarinet, te_Flute)

    eval_metrics['avg'] = (sum(train_metrics) / len(train_metrics),
                           sum(val_metrics) / len(val_metrics),
                           sum(test_metrics) / len(test_metrics),
                           sum(test_metrics_AltoSax) /
                           len(test_metrics_AltoSax),
                           sum(test_metrics_BbClarinet) /
                           len(test_metrics_BbClarinet),
                           sum(test_metrics_Flute) / len(test_metrics_Flute))

    model_n = "Similarity_batch32_lr0.05_midialigned_s_{}_sample2_chunksize{}_{}".format(
        process_collate, chunk_size, model_choose)
    results_dir = './results'

    results_fp = os.path.join(
        results_dir,
        model_n + f'{band}{split}{score_choose}_results_ins_dict.json')
    if not os.path.exists(os.path.dirname(results_fp)):
        os.makedirs(os.path.dirname(results_fp))
    with open(results_fp, 'w') as outfile:
        json.dump(eval_metrics, outfile, indent=2)
    print(len(train_metrics), len(val_metrics), len(test_metrics))
    print('model :', model_name)
    print('train metrics', sum(train_metrics) / len(train_metrics))
    print('valid metrics', sum(val_metrics) / len(val_metrics))
    print('test metrics', sum(test_metrics) / len(test_metrics))

    print('test metrics AltoSax',
          sum(test_metrics_AltoSax) / len(test_metrics_AltoSax))
    print('test metrics BbClarinet',
          sum(test_metrics_BbClarinet) / len(test_metrics_BbClarinet))
    print('test metrics Flute',
          sum(test_metrics_Flute) / len(test_metrics_Flute))

    print('--------------------------------------------------')
Example #22
0
result = []

colors = ["red", "royalblue", "violet", "green", "cyan", "orange"]
instance_types = [
    "m3.large", "m5.2xlarge", "m5.large", "m5.xlarge", "r3.xlarge",
    "r5d.xlarge"
]
feature_importance = []
fig = plt.figure(figsize=(16, 9))

for i in range(len(instance_types)):
    TARGET_TYPE = instance_types[i]
    print("=" * 10, TARGET_TYPE, "=" * 10)

    df = lib.load_data(DATA_PATH, TARGET_TYPE)
    df, mean, std = lib.normalize(df)

    (x_train, y_train), (x_test, y_test), columns = lib.train_test_split_lstm(
        df["price"].values, df.index, PAST_HISTORY, TRAIN_RATIO)
    # モデルを定義
    model = create_model(x_train.shape[-2:])
    # モデルを学習
    hist = model.fit(x_train,
                     y_train,
                     batch_size=BATCH_SIZE,
                     epochs=EPOCHS,
                     verbose=1,
                     validation_data=(x_test, y_test))
    y_pred = model.predict(x_test)
Example #23
0
replicates,s1col,s2col,toleratedNA,annotation,species,idtype,input,output,dolog2=load_parameters()

#check variables for shift analysis
if len(s1col)==1 or len(s2col)==1:
	print 'Limma analysis cannot be done (no multiple samples per condition).';time.sleep(3);
	shiftan=False
else:
	shiftan=True
	shiftcol=[]
	for col in s1col+s2col:shiftcol.append(str(col+1))
	shiftcol=string.join(shiftcol,',')
	control=string.join(['0']*len(s1col)+['1']*len(s2col),',')
	treated=string.join(['1']*len(s1col)+['0']*len(s2col),',')

#load data
content=load_data(input)

#check for replicates, tolerated missing data and collapse replicates for every sample
content2=ctc_replicates(replicates,content,toleratedNA,s1col,s2col,dolog2)

#automatic protein grouping
if annotation=='automatic':
	out=open('auto_cnv_input.txt','w')
	for line in content2:out.write(string.join(line,'\t')+'\n')
	out.close()
	temp=open('templates/auto_cnv.r').read()
	if shiftan==True:
		temp+=open('templates/shift_analysis.r').read()
		temp=temp.replace('<insertcolumns>',shiftcol);temp=temp.replace('<insertcontrol>',control);temp=temp.replace('<inserttreated>',treated)
	temp=temp.replace('<insertspecieshere>',species);temp=temp.replace('<insertfilterhere>',idtype);temp=temp.replace('output',output)
	outr=open('auto_cnv.r','w');outr.write(temp);outr.close()
import keras.backend as K
from keras.models import load_model
import json
import time

import imgaug as ia
from imgaug import augmenters as iaa

if lib.isnotebook():
    from keras_tqdm import TQDMNotebookCallback as KerasCallBack
else:
    from keras_tqdm import TQDMCallback as KerasCallBack

# In[16]:

data = lib.load_data()

# In[155]:

seq = iaa.Sequential([
    iaa.Fliplr(0.5),
    iaa.Sometimes(0.5, [
        iaa.Crop(px=(0, 12)),
        iaa.Pad(px=4, pad_mode=ia.ALL, pad_cval=(0, 255))
    ]),
    iaa.Sometimes(0.15, iaa.Dropout(p=0.05))
],
                     random_order=True)

# In[156]:
Example #25
0
def train():
    users, finder_decisions = load_data()
    # users_df = pd.read_pickle('users_df')
    # users_df = users_df[(~users_df['feature'].isnull()) ]
    # finder_decisions = finder_decisions.merge(users_df['feature'].to_frame(), how='left', left_on='Receiver_id',
    #                                           right_index=True)
    # finder_decisions.drop(finder_decisions.index[finder_decisions['feature'].isnull()],inplace=True)
    # finder_decisions['Decision'] = finder_decisions['Decision'] == 'like'
    #
    # print(finder_decisions['Sender_id'].value_counts())
    # # finder_decisions = finder_decisions[finder_decisions['Sender_id'].value_counts() > 100]
    # data = finder_decisions[finder_decisions['Sender_id'] == 3023001477]
    # data_train, data_valid = train_test_split(data, test_size=0.5)
    # neigh = KNeighborsClassifier(n_neighbors=3)
    # neigh.fit(np.stack(data_train['feature']), data_train['Decision'])
    # predictions = neigh.predict(np.stack(data_valid['feature']))
    # print((predictions == data_valid['Decision']).sum()/len(predictions))

    # users_df = users_df[(~users_df['like_preferences'].isnull()) & (~users_df['skip_preferences'].isnull()) & (~users_df['feature'].isnull()) ]
    # finder_decisions = finder_decisions.merge(users_df[['like_preferences', 'skip_preferences']], how='right', left_on='Sender_id',
    #                                           right_index=True)
    # finder_decisions = finder_decisions.merge(users_df['feature'].to_frame(), how='right', left_on='Receiver_id',
    #                                           right_index=True)
    # finder_decisions = finder_decisions[(~finder_decisions['like_preferences'].isnull()) & (~finder_decisions['skip_preferences'].isnull()) & (~finder_decisions['feature'].isnull()) ]
    #
    # finder_decisions['Decision'] = finder_decisions['Decision'] == 'like'

    # for i in range(len(finder_decisions)):
    #     like_similarity = cosine_similarity(finder_decisions.iloc[i]['feature'].reshape(1,-1), finder_decisions.iloc[i]['like_preferences'].reshape(1,-1))
    #     skip_similarity = cosine_similarity(finder_decisions.iloc[i]['feature'].reshape(1, -1),
    #                                     finder_decisions.iloc[i]['skip_preferences'].reshape(1, -1))

    # finder_decisions = finder_decisions.merge(users_df['preferences'].to_frame(), how='left', left_on='Sender_id',
    #                                           right_index=True)
    # finder_decisions = finder_decisions.merge(users_df['feature'].to_frame(), how='left', left_on='Receiver_id',
    #                                           right_index=True)
    # finder_decisions.drop(finder_decisions.index[finder_decisions['preferences'].isnull()], inplace=True)

    zz = users.index.unique()[:20000]
    users.drop(users.index[~users.index.isin(zz)], inplace=True)
    finder_decisions.drop(
        finder_decisions.index[~finder_decisions['Sender_id'].isin(zz)],
        inplace=True)
    finder_decisions.drop(
        finder_decisions.index[~finder_decisions['Receiver_id'].isin(zz)],
        inplace=True)
    sender_value_counts = finder_decisions['Sender_id'].value_counts()
    finder_decisions.drop(
        finder_decisions.index[finder_decisions['Sender_id'].isin(
            sender_value_counts.index[sender_value_counts == 1])],
        inplace=True)
    # users.drop(users.index[~users.index.isin(np.union1d(finder_decisions['Sender_id'].values, finder_decisions['Receiver_id'].values))], inplace=True)
    users['index'] = np.arange(len(users))
    finder_decisions = finder_decisions.merge(users,
                                              how='left',
                                              left_on='Sender_id',
                                              right_index=True)
    finder_decisions.rename(columns={
        'age': 'Sender_age',
        'gender': 'Sender_gender',
        'index': 'Sender_index'
    },
                            inplace=True)
    finder_decisions = finder_decisions.merge(users,
                                              how='left',
                                              left_on='Receiver_id',
                                              right_index=True)
    finder_decisions.rename(columns={
        'age': 'Receiver_age',
        'gender': 'Receiver_gender',
        'index': 'Receiver_index'
    },
                            inplace=True)
    finder_decisions['Decision'] = finder_decisions['Decision'] == 'like'

    # n_users = len(users)
    # finder_decisions_train, finder_decisions_valid =  train_test_split(finder_decisions, stratify=finder_decisions['Decision'])
    # finder_decisions_train, finder_decisions_valid = train_test_split(finder_decisions, stratify=finder_decisions['Sender_index'], test_size=0.2)
    finder_decisions_train, finder_decisions_valid = train_test_split(
        finder_decisions,
        test_size=0.2,
        stratify=finder_decisions['Sender_index'])

    # d_train = lgb.Dataset(finder_decisions_train['like_preferences'], label=y_train)
    # params = {}
    # params['learning_rate'] = 0.003
    # params['boosting_type'] = 'gbdt'
    # params['objective'] = 'binary'
    # params['metric'] = 'binary_logloss'
    # params['sub_feature'] = 0.5
    # params['num_leaves'] = 10
    # params['min_data'] = 50
    # params['max_depth'] = 10
    # clf = lgb.train(params, d_train, 100)

    model = get_model_embedding(len(users))
    # model = load_model('model.model')
    model.fit([
        finder_decisions_train['Sender_index'].values,
        finder_decisions_train['Receiver_index'].values
    ],
              finder_decisions_train['Decision'].values,
              epochs=100,
              verbose=1,
              batch_size=BATCH_SIZE,
              validation_data=([
                  finder_decisions_valid['Sender_index'].values,
                  finder_decisions_valid['Receiver_index'].values
              ], finder_decisions_valid['Decision'].values),
              callbacks=get_callbacks())
def test_output_type():
    data = load_data("dataset_a")
    assert isinstance(transform(data), pd.DataFrame)
Example #27
0
# coding=utf-8
import sys

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas
from matplotlib.widgets import TextBox, Button
from pandas.plotting import register_matplotlib_converters

from lib import normalize, load_data, user_events_to_data, KW_GOING_HELI, KW_BACKWARD, widget_log

register_matplotlib_converters()

user_events_style = {"color": "red", "alpha": 0.33}
file_name = sys.argv[1]
(gps, alt, user2) = load_data(file_name)
user_marks_holder = [user2]

if __name__ == '__main__':
    x = gps['altitude']
    smoothed = x.ewm(alpha=0.1).mean()
    norm_speed = normalize(gps['speed'].ewm(alpha=0.03).mean())

    charts_ax = normalize(smoothed).plot(label="smoothed")
    norm_speed.plot(label="speed", alpha=0.3)

    plt.gca().xaxis.set_major_locator(mdates.HourLocator())
    plt.gca().xaxis.set_major_formatter(
        mdates.DateFormatter('%H:%M'))  # %Y-%m-%d %H:%M
    error_text = TextBox(plt.axes([0.04, 0.005, 0.8, 0.04]),
                         'Error',
Example #28
0
def data():
    print("\n✨✨✨ setUp")
    yield load_data("dataset_a")
    print("✨✨✨ tearDown")
    delete_data()
Example #29
0
                                                n_features * 2),
                            random_state=42,
                            max_iter=1200)

        clf.fit(x_train, y_train)

        y_pred = clf.predict(x_test)
        print "Accuracy score: %s" % accuracy_score(y_test, y_pred)
        return clf
    except:
        print "Unexpected error:", sys.exc_info()[0]


if __name__ == '__main__':
    # alt['relativeAltitude']
    (gps_train, alt_train, user_train) = load_data("part_1")
    (gps_test, alt_test, user_test) = load_data("part_2")

    # X = X1 + X2
    # y = y1 + y2

    # x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=27)

    experiment_id = 0
    for alpha in (0.001, 0.005, 0.01, 0.015):
        for const_norm in [True]:
            for use_data_perc in (0.5, 0.75):
                for start_second in [80, 100, 120]:
                    for count in [16, 70, 90]:
                        for (wfn, wfn_params) in (
                            (exp_window, {