def add_guild(self, key): found = next((x for x in self._guilds if x.api_key == key), None) if found: return found.gid else: tmp = load_data(key, self._sort_items) if not hasattr(tmp, 'room'): raise InvalidPermissionException tmp.api_key = key self._guilds.append(tmp) return tmp.gid
def main(): # load training and validation data (function inside lib.py) trPC, vaPC, SC = load_data(band) for i in range(0,10): manualSeed = i np.random.seed(manualSeed) random.seed(manualSeed) torch.manual_seed(manualSeed) # if you are suing GPU torch.cuda.manual_seed(manualSeed) torch.cuda.manual_seed_all(manualSeed) model_name = '{}_batch{}_lr{}_midi{}_{}_sample{}_chunksize{}_{}_{}{}_score{}_NORM_{}'.format(loss_func, batch_size, lr, midi_op, \ process_collate, sample_num, chunk_size, \ model_choose, band, split, score_choose, manualSeed) #'Similarity_batch16_lr0.001_midialigneds_windowChunk1sample10sec_CNN' print('batch_size: {}, num_workers: {}, epoch: {}, lr: {}, model_name: {}'.format(batch_size, num_workers, epoch, lr, model_name)) print('band: {}, feat: {}, midi_op: {}'.format(band, feat, midi_op)) #check_missing_alignedmidi(band, feat, midi_op) # model saving path from datetime import date date = date.today() out_model_fn = './model/%d%d%d/%s/'%(date.year,date.month,date.day,model_name) if not os.path.exists(out_model_fn): os.makedirs(out_model_fn) # prepare dataloader (function inside lib.py) t_kwargs = {'batch_size': batch_size, 'num_workers': num_workers, 'shuffle': shuffle, 'pin_memory': True,'drop_last': True} v_kwargs = {'batch_size': batch_size, 'num_workers': num_workers, 'pin_memory': True} tr_loader = torch.utils.data.DataLoader(Data2Torch([trPC, SC], midi_op), worker_init_fn=np.random.seed(manualSeed), \ collate_fn=partial(my_collate, [process_collate, sample_num, chunk_size]), \ **t_kwargs) va_loader = torch.utils.data.DataLoader(Data2Torch([vaPC, SC], midi_op), worker_init_fn=np.random.seed(manualSeed), \ collate_fn=partial(my_collate, [process_collate, sample_num, chunk_size]), \ **t_kwargs) # build model (function inside model.py) model = JointEmbedNet(model_choose) if torch.cuda.is_available(): model.cuda() # start training (function inside train_utils.py) Trer = Trainer(model, lr, epoch, out_model_fn) Trer.fit(tr_loader, va_loader) print(model_name)
def __init__(self, sort_items): self._translator = Translator('en') self._guilds = [] self._filters = [] self._sort_items = sort_items with open("data/guilds.json", "r") as f: keys = json.load(f) for k in keys: tmp = load_data(k, self._sort_items) tmp.api_key = k self._guilds.append(tmp) self.add_filter(dummy_filter) self.current_filter = self._filters[0] self.current_guild = None
def download_data(): pool = multiprocessing.Pool(processes=NUM_PROCESSES) users_df, _ = load_data() if not os.path.exists(PHOTOS_PATH): os.makedirs(PHOTOS_PATH) existing_photos = [ int(x.split('.')[0]) for x in os.listdir(PHOTOS_PATH) if not x.startswith('.') ] users_df.drop(existing_photos, inplace=True) urls = users_df['Photo'].values users_ids = users_df.index.values list( tqdm(pool.imap_unordered(process_url, zip(users_ids, urls)), total=len(users_ids)))
def main(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") for i in range(4,10): manualSeed = i np.random.seed(manualSeed) random.seed(manualSeed) torch.manual_seed(manualSeed) # if you are suing GPU torch.cuda.manual_seed(manualSeed) torch.cuda.manual_seed_all(manualSeed) model_name = '{}_{}_batch{}_lr{}_{}_{}_isNorm{}_{}'.format(model_choose, matrix_dim, batch_size, lr, band, split, isNorm, manualSeed) print('range: {}, batch_size: {}, num_workers: {}, epoch: {}, lr: {}, model_name: {}'.format(i,batch_size, num_workers, epoch, lr, model_name)) print('band: {}, split: {}, matrix_dim: {}'.format(band, split, matrix_dim)) # model saving path from datetime import date date = date.today() out_model_fn = './model/%d%d%d/%s/' % (date.year, date.month, date.day, model_name) if not os.path.exists(out_model_fn): os.makedirs(out_model_fn) # load training and validation data (function inside lib.py) trPC, vaPC = load_data(band) # prepare dataloader (function inside lib.py) t_kwargs = {'batch_size': batch_size, 'shuffle': shuffle, 'pin_memory': True,'drop_last': True} v_kwargs = {'batch_size': batch_size, 'pin_memory': True} tr_loader = torch.utils.data.DataLoader(Data2Torch([trPC]), worker_init_fn=np.random.seed(manualSeed), **t_kwargs) va_loader = torch.utils.data.DataLoader(Data2Torch([vaPC]), worker_init_fn=np.random.seed(manualSeed), **v_kwargs) # build model (function inside model.py) model = DistMatNet(model_name) model.to(device) # start training (function inside train_utils.py) Trer = Trainer(model, lr, epoch, out_model_fn) Trer.fit(tr_loader, va_loader, device) print(model_name)
def data(request): return load_data(request.param)
def test_output_shape(): data = load_data("dataset_a") assert transform(data).shape == (10, 1)
rn.seed(RANDOM_STATE) tf.random.set_seed(RANDOM_STATE) colors = ["#000000", "#222222", "#444444", "#666666", "#888888", "#aaaaaa"] makers = ["o", "v", "^", "*", "s", "p"] markerfacecolor = '#ffffff' plt.figure(figsize=(10, 10)) instance_types = [ "m3.large", "m5.2xlarge", "m5.large", "m5.xlarge", "r3.xlarge", "r5d.xlarge" ] for it_index in range(len(instance_types)): TARGET_TYPE = instance_types[it_index] df = lib.load_data(DATA_PATH, TARGET_TYPE) df, mean, std = lib.normalize(df) (x_train, y_train), (x_test, y_test), columns = lib.train_test_split_lstm( df["price"].values, df.index, PAST_HISTORY, TRAIN_RATIO) # モデルを定義 model = create_model(x_train.shape[-2:]) # モデルを学習 model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1, validation_data=(x_test, y_test)) df_10days = lib.load_data(DATA_PATH_ACTUAL, TARGET_TYPE)
print( "-----------------------------------------------------------------------------" ) print("Use -d FILENAME in command line to set dataset to FILENAME") print( "Use -c CONSTANT in command line to set uncertianty constant to value CONSTANT" ) print("Use -m MODE in command line to set multiplication mode to value MODE") print( "Use -o FILENAME in command line to set the output filenames to FILENAME") print("use -t FLOAT in command line to set the convergence threshold") print( "-----------------------------------------------------------------------------" ) data = load_data(options.data) print("dataset loaded...") # Convert data into opinion matrix print("Convert dataset to Opinion Matrix...") P = get_opinion(data, options.constant) # print(P) # Set plus, times operations with respect to EBSL mode plus = oplus if (options.mode & 4) == 4: print("otimes") times = otimes else: if (options.mode & 1) == 0:
def data(): print("\n✨✨✨ setUp") return load_data("dataset_a")
from lib import load_data import pandas as pd import numpy as np _, finder_decisions = load_data() users_df = pd.read_pickle('users_df') finder_decisions = finder_decisions.merge(users_df['feature'].to_frame(), how='left', left_on='Receiver_id', right_index=True) like_preferences = finder_decisions[finder_decisions['Decision'] == 'like'].groupby( 'Sender_id')['feature'].apply(np.mean) skip_preferences = finder_decisions[finder_decisions['Decision'] == 'skip'].groupby( 'Sender_id')['feature'].apply(np.mean) users_df['like_preferences'] = like_preferences users_df['skip_preferences'] = skip_preferences users_df.to_pickle('users_df') # for id in users_df.index.values:
import matplotlib.pyplot as plt import pandas from numpy import array from pandas.plotting import register_matplotlib_converters from learn import exp_window register_matplotlib_converters() from lib import normalize, load_data, ALPHA, ALT_SPEED_MEAN, ALT_SPEED_MIN, SPEED_MEAN, SPEED_MIN, ALT_SPEED_MAX, \ SPEED_MAX, user_events_to_data user_events_style = {"color": "red", "alpha": 0.33} (gps, alt, user) = load_data("part_1") if __name__ == '__main__': x = gps['altitude'] # x = alt["relativeAltitude"] smoothed = x.ewm(alpha=ALPHA).mean() norm_v_speed = normalize(smoothed.diff(), mean=ALT_SPEED_MEAN, min=ALT_SPEED_MIN, max=ALT_SPEED_MAX) norm_speed = normalize(gps['speed'].ewm(alpha=ALPHA).mean(), mean=SPEED_MEAN, min=SPEED_MIN, max=SPEED_MAX) normalize(smoothed).plot(label="smoothed")
def today(): return lib.load_data(COURSES_PATH).today()
def day(name): return lib.load_data(COURSES_PATH).day(name)
def data(): print("\n✨✨✨ data setUp") yield load_data("dataset_a") print("✨✨✨ data tearDown")
def main(model_name_e): train_metrics, val_metrics, test_metrics = [], [], [] test_metrics_AltoSax, test_metrics_BbClarinet, test_metrics_Flute = [], [], [] trPC, vaPC = load_data(band) tePC = load_test_data(band) teAltoSaxPC = load_test_data(band, 'Alto Saxophone') teBbClarinetPC = load_test_data(band, 'Bb Clarinet') teFlutePC = load_test_data(band, 'Flute') kwargs = {'batch_size': batch_size, 'pin_memory': True} #kwargs = {'pin_memory': True} tr_loader = torch.utils.data.DataLoader(Data2Torch([trPC]), **kwargs) va_loader = torch.utils.data.DataLoader(Data2Torch([vaPC]), **kwargs) te_loader = torch.utils.data.DataLoader(Data2Torch([tePC]), **kwargs) te_AltoSax_loader = torch.utils.data.DataLoader(Data2Torch([teAltoSaxPC]), **kwargs) te_BbClarinet_loader = torch.utils.data.DataLoader( Data2Torch([teBbClarinetPC]), **kwargs) te_Flute_loader = torch.utils.data.DataLoader(Data2Torch([teFlutePC]), **kwargs) print(model_name_e) result = {} eval_metrics = dict() for i in range(0, 10): if True: model_name = model_name_e + '_' + str(i) model_path = './model/' + model_name + '/model' # build model (function inside model.py) model = DistMatNet(model_name) if torch.cuda.is_available(): model.cuda() model.load_state_dict(torch.load(model_path)['state_dict']) model.eval() for j in [1, 4, 7]: model.model.conv[j].bn1.momentum = 0 model.model.conv[j].bn2.momentum = 0 model.model.conv[j].bn3.momentum = 0 model.model.conv[j].bn1.track_running_stats = False model.model.conv[j].bn2.track_running_stats = False model.model.conv[j].bn3.track_running_stats = False print('model :', model_name) tr = evaluate_model(model, tr_loader) print('train metrics', train_metrics) va = evaluate_model(model, va_loader) print('valid metrics', val_metrics) te = evaluate_model(model, te_loader) print('test metrics', test_metrics) print('--------------------------------------------------') te_AltoSax = evaluate_model(model, te_AltoSax_loader) te_BbClarinet = evaluate_model(model, te_BbClarinet_loader) te_Flute = evaluate_model(model, te_Flute_loader) train_metrics.append(tr) val_metrics.append(va) test_metrics.append(te) test_metrics_AltoSax.append(te_AltoSax) test_metrics_BbClarinet.append(te_BbClarinet) test_metrics_Flute.append(te_Flute) print(tr, va, te, te_AltoSax, te_BbClarinet, te_Flute) eval_metrics[i] = (tr, va, te, te_AltoSax, te_BbClarinet, te_Flute) eval_metrics['avg'] = (sum(train_metrics) / len(train_metrics), sum(val_metrics) / len(val_metrics), sum(test_metrics) / len(test_metrics), sum(test_metrics_AltoSax) / len(test_metrics_AltoSax), sum(test_metrics_BbClarinet) / len(test_metrics_BbClarinet), sum(test_metrics_Flute) / len(test_metrics_Flute)) print("{:.3f}, {:.3f}, {:.3f}" .format(sum(train_metrics) / len(train_metrics), \ sum(val_metrics) / len(val_metrics), sum(test_metrics) / len(test_metrics))) with open('result/' + model_name_e.split('/')[1] + '.json', 'w') as outfile: json.dump(result, outfile)
import pandas as pd import numpy as np from lib import load_data #from tqdm import tqdm from sklearn.preprocessing import StandardScaler import datetime from scipy.stats import multivariate_normal, norm config = {'only_click_touch_type': False, 'standard_scale': False} data, data_valid, labels = load_data(valid_size=0.2) data.drop(data[data['Fraud_reasons'] == 'mix'].index, inplace=True) data_valid.drop(data_valid[data_valid['Fraud_reasons'] == 'mix'].index, inplace=True) data.loc[data['Fraud_reasons'].isnull(), 'Fraud_reasons'] = 'ok' data_valid.loc[data_valid['Fraud_reasons'].isnull(), 'Fraud_reasons'] = 'ok' fraud_reasons = data['Fraud_reasons'].unique() data['install_time'] = pd.to_datetime(data['install_time']) data_valid['install_time'] = pd.to_datetime(data_valid['install_time']) data['attributed_touch_time'] = pd.to_datetime(data['attributed_touch_time']) data_valid['attributed_touch_time'] = pd.to_datetime( data_valid['attributed_touch_time']) data['contributor_1_touch_time'] = pd.to_datetime( data['contributor_1_touch_time'], errors='coerce') data['contributor_2_touch_time'] = pd.to_datetime( data['contributor_2_touch_time'], errors='coerce') data['contributor_3_touch_time'] = pd.to_datetime( data['contributor_3_touch_time'], errors='coerce') data_valid['contributor_1_touch_time'] = pd.to_datetime(
def setUp(self): # Runs before each test print("\n✨✨✨ setUp") self.data = load_data("dataset_a")
subprocess.Popen(['xdg-open', src]) def quit_app(icon) -> None: icon.stop() def get_submenu(info: dict[str, str]) -> list[i]: allowed = ['website'] return [ i('Web page', lambda: open_webpage(v)) for k, v in info.items() if k in allowed ] data = load_data(COURSES_PATH) items = lambda: [ i(d.describe(), m(*get_submenu(d.course))) for d in data.today().entries ] items_or_default = lambda: items() if len(items() ) else [i('(empty)', do_nothing)] tray = pystray.Icon('Unischedule') tray.icon = icon_image def setup(icon): icon.menu = ( *items_or_default(), m.SEPARATOR, i('Update', lambda: update_icon()),
def refresh_guilds(self): for g in self._guilds: if data_need_reload(g.api_key): g = load_data(g.api_key, self._sort_items)
def main(): train_metrics, val_metrics, test_metrics = [], [], [] test_metrics_AltoSax, test_metrics_BbClarinet, test_metrics_Flute = [], [], [] trPC, vaPC, SC = load_data(band, feat, midi_op) tePC = load_test_data(band, feat) teAltoSaxPC = load_test_data(band, feat, 'Alto Saxophone') teBbClarinetPC = load_test_data(band, feat, 'Bb Clarinet') teFlutePC = load_test_data(band, feat, 'Flute') print(len(teAltoSaxPC), len(teBbClarinetPC), len(teFlutePC)) kwargs = {'num_workers': num_workers, 'pin_memory': True} tr_loader = torch.utils.data.DataLoader(Data2Torch([trPC, SC], midi_op), \ collate_fn=partial(test_collate, [overlap_flag, chunk_size]), **kwargs) va_loader = torch.utils.data.DataLoader(Data2Torch([vaPC, SC], midi_op), \ collate_fn=partial(test_collate, [overlap_flag, chunk_size]), **kwargs) te_loader = torch.utils.data.DataLoader(Data2Torch([tePC, SC], midi_op), collate_fn=partial( test_collate, [overlap_flag, chunk_size]), **kwargs) te_AltoSax_loader = torch.utils.data.DataLoader( Data2Torch([teAltoSaxPC, SC], midi_op), collate_fn=partial(test_collate, [overlap_flag, chunk_size]), **kwargs) te_BbClarinet_loader = torch.utils.data.DataLoader( Data2Torch([teBbClarinetPC, SC], midi_op), collate_fn=partial(test_collate, [overlap_flag, chunk_size]), **kwargs) te_Flute_loader = torch.utils.data.DataLoader( Data2Torch([teFlutePC, SC], midi_op), collate_fn=partial(test_collate, [overlap_flag, chunk_size]), **kwargs) eval_metrics = dict() for i in range(0, 10): model_name = '202059/Similarity_batch32_lr0.05_midialigned_s_{}_sample2_chunksize{}_{}_{}{}_score{}_NORM_' \ .format(process_collate, chunk_size, model_choose, band, split, score_choose) + str(i) #model_name = model_choose + '_' + str(i) # if resize the midi to fit the length of audio resample = False if midi_op == 'resize': resample = True model_path = './model/' + model_name + '/model' model = JointEmbedNet(model_choose) if torch.cuda.is_available(): model.cuda() model.load_state_dict(torch.load(model_path)['state_dict']) tr = evaluate_model(model, tr_loader) va = evaluate_model(model, va_loader) te = evaluate_model(model, te_loader) te_AltoSax = evaluate_model(model, te_AltoSax_loader) te_BbClarinet = evaluate_model(model, te_BbClarinet_loader) te_Flute = evaluate_model(model, te_Flute_loader) train_metrics.append(tr) val_metrics.append(va) test_metrics.append(te) test_metrics_AltoSax.append(te_AltoSax) test_metrics_BbClarinet.append(te_BbClarinet) test_metrics_Flute.append(te_Flute) print(tr, va, te, te_AltoSax, te_BbClarinet, te_Flute) eval_metrics[i] = (tr, va, te, te_AltoSax, te_BbClarinet, te_Flute ) # (te_AltoSax, te_BbClarinet, te_Flute) eval_metrics['avg'] = (sum(train_metrics) / len(train_metrics), sum(val_metrics) / len(val_metrics), sum(test_metrics) / len(test_metrics), sum(test_metrics_AltoSax) / len(test_metrics_AltoSax), sum(test_metrics_BbClarinet) / len(test_metrics_BbClarinet), sum(test_metrics_Flute) / len(test_metrics_Flute)) model_n = "Similarity_batch32_lr0.05_midialigned_s_{}_sample2_chunksize{}_{}".format( process_collate, chunk_size, model_choose) results_dir = './results' results_fp = os.path.join( results_dir, model_n + f'{band}{split}{score_choose}_results_ins_dict.json') if not os.path.exists(os.path.dirname(results_fp)): os.makedirs(os.path.dirname(results_fp)) with open(results_fp, 'w') as outfile: json.dump(eval_metrics, outfile, indent=2) print(len(train_metrics), len(val_metrics), len(test_metrics)) print('model :', model_name) print('train metrics', sum(train_metrics) / len(train_metrics)) print('valid metrics', sum(val_metrics) / len(val_metrics)) print('test metrics', sum(test_metrics) / len(test_metrics)) print('test metrics AltoSax', sum(test_metrics_AltoSax) / len(test_metrics_AltoSax)) print('test metrics BbClarinet', sum(test_metrics_BbClarinet) / len(test_metrics_BbClarinet)) print('test metrics Flute', sum(test_metrics_Flute) / len(test_metrics_Flute)) print('--------------------------------------------------')
result = [] colors = ["red", "royalblue", "violet", "green", "cyan", "orange"] instance_types = [ "m3.large", "m5.2xlarge", "m5.large", "m5.xlarge", "r3.xlarge", "r5d.xlarge" ] feature_importance = [] fig = plt.figure(figsize=(16, 9)) for i in range(len(instance_types)): TARGET_TYPE = instance_types[i] print("=" * 10, TARGET_TYPE, "=" * 10) df = lib.load_data(DATA_PATH, TARGET_TYPE) df, mean, std = lib.normalize(df) (x_train, y_train), (x_test, y_test), columns = lib.train_test_split_lstm( df["price"].values, df.index, PAST_HISTORY, TRAIN_RATIO) # モデルを定義 model = create_model(x_train.shape[-2:]) # モデルを学習 hist = model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1, validation_data=(x_test, y_test)) y_pred = model.predict(x_test)
replicates,s1col,s2col,toleratedNA,annotation,species,idtype,input,output,dolog2=load_parameters() #check variables for shift analysis if len(s1col)==1 or len(s2col)==1: print 'Limma analysis cannot be done (no multiple samples per condition).';time.sleep(3); shiftan=False else: shiftan=True shiftcol=[] for col in s1col+s2col:shiftcol.append(str(col+1)) shiftcol=string.join(shiftcol,',') control=string.join(['0']*len(s1col)+['1']*len(s2col),',') treated=string.join(['1']*len(s1col)+['0']*len(s2col),',') #load data content=load_data(input) #check for replicates, tolerated missing data and collapse replicates for every sample content2=ctc_replicates(replicates,content,toleratedNA,s1col,s2col,dolog2) #automatic protein grouping if annotation=='automatic': out=open('auto_cnv_input.txt','w') for line in content2:out.write(string.join(line,'\t')+'\n') out.close() temp=open('templates/auto_cnv.r').read() if shiftan==True: temp+=open('templates/shift_analysis.r').read() temp=temp.replace('<insertcolumns>',shiftcol);temp=temp.replace('<insertcontrol>',control);temp=temp.replace('<inserttreated>',treated) temp=temp.replace('<insertspecieshere>',species);temp=temp.replace('<insertfilterhere>',idtype);temp=temp.replace('output',output) outr=open('auto_cnv.r','w');outr.write(temp);outr.close()
import keras.backend as K from keras.models import load_model import json import time import imgaug as ia from imgaug import augmenters as iaa if lib.isnotebook(): from keras_tqdm import TQDMNotebookCallback as KerasCallBack else: from keras_tqdm import TQDMCallback as KerasCallBack # In[16]: data = lib.load_data() # In[155]: seq = iaa.Sequential([ iaa.Fliplr(0.5), iaa.Sometimes(0.5, [ iaa.Crop(px=(0, 12)), iaa.Pad(px=4, pad_mode=ia.ALL, pad_cval=(0, 255)) ]), iaa.Sometimes(0.15, iaa.Dropout(p=0.05)) ], random_order=True) # In[156]:
def train(): users, finder_decisions = load_data() # users_df = pd.read_pickle('users_df') # users_df = users_df[(~users_df['feature'].isnull()) ] # finder_decisions = finder_decisions.merge(users_df['feature'].to_frame(), how='left', left_on='Receiver_id', # right_index=True) # finder_decisions.drop(finder_decisions.index[finder_decisions['feature'].isnull()],inplace=True) # finder_decisions['Decision'] = finder_decisions['Decision'] == 'like' # # print(finder_decisions['Sender_id'].value_counts()) # # finder_decisions = finder_decisions[finder_decisions['Sender_id'].value_counts() > 100] # data = finder_decisions[finder_decisions['Sender_id'] == 3023001477] # data_train, data_valid = train_test_split(data, test_size=0.5) # neigh = KNeighborsClassifier(n_neighbors=3) # neigh.fit(np.stack(data_train['feature']), data_train['Decision']) # predictions = neigh.predict(np.stack(data_valid['feature'])) # print((predictions == data_valid['Decision']).sum()/len(predictions)) # users_df = users_df[(~users_df['like_preferences'].isnull()) & (~users_df['skip_preferences'].isnull()) & (~users_df['feature'].isnull()) ] # finder_decisions = finder_decisions.merge(users_df[['like_preferences', 'skip_preferences']], how='right', left_on='Sender_id', # right_index=True) # finder_decisions = finder_decisions.merge(users_df['feature'].to_frame(), how='right', left_on='Receiver_id', # right_index=True) # finder_decisions = finder_decisions[(~finder_decisions['like_preferences'].isnull()) & (~finder_decisions['skip_preferences'].isnull()) & (~finder_decisions['feature'].isnull()) ] # # finder_decisions['Decision'] = finder_decisions['Decision'] == 'like' # for i in range(len(finder_decisions)): # like_similarity = cosine_similarity(finder_decisions.iloc[i]['feature'].reshape(1,-1), finder_decisions.iloc[i]['like_preferences'].reshape(1,-1)) # skip_similarity = cosine_similarity(finder_decisions.iloc[i]['feature'].reshape(1, -1), # finder_decisions.iloc[i]['skip_preferences'].reshape(1, -1)) # finder_decisions = finder_decisions.merge(users_df['preferences'].to_frame(), how='left', left_on='Sender_id', # right_index=True) # finder_decisions = finder_decisions.merge(users_df['feature'].to_frame(), how='left', left_on='Receiver_id', # right_index=True) # finder_decisions.drop(finder_decisions.index[finder_decisions['preferences'].isnull()], inplace=True) zz = users.index.unique()[:20000] users.drop(users.index[~users.index.isin(zz)], inplace=True) finder_decisions.drop( finder_decisions.index[~finder_decisions['Sender_id'].isin(zz)], inplace=True) finder_decisions.drop( finder_decisions.index[~finder_decisions['Receiver_id'].isin(zz)], inplace=True) sender_value_counts = finder_decisions['Sender_id'].value_counts() finder_decisions.drop( finder_decisions.index[finder_decisions['Sender_id'].isin( sender_value_counts.index[sender_value_counts == 1])], inplace=True) # users.drop(users.index[~users.index.isin(np.union1d(finder_decisions['Sender_id'].values, finder_decisions['Receiver_id'].values))], inplace=True) users['index'] = np.arange(len(users)) finder_decisions = finder_decisions.merge(users, how='left', left_on='Sender_id', right_index=True) finder_decisions.rename(columns={ 'age': 'Sender_age', 'gender': 'Sender_gender', 'index': 'Sender_index' }, inplace=True) finder_decisions = finder_decisions.merge(users, how='left', left_on='Receiver_id', right_index=True) finder_decisions.rename(columns={ 'age': 'Receiver_age', 'gender': 'Receiver_gender', 'index': 'Receiver_index' }, inplace=True) finder_decisions['Decision'] = finder_decisions['Decision'] == 'like' # n_users = len(users) # finder_decisions_train, finder_decisions_valid = train_test_split(finder_decisions, stratify=finder_decisions['Decision']) # finder_decisions_train, finder_decisions_valid = train_test_split(finder_decisions, stratify=finder_decisions['Sender_index'], test_size=0.2) finder_decisions_train, finder_decisions_valid = train_test_split( finder_decisions, test_size=0.2, stratify=finder_decisions['Sender_index']) # d_train = lgb.Dataset(finder_decisions_train['like_preferences'], label=y_train) # params = {} # params['learning_rate'] = 0.003 # params['boosting_type'] = 'gbdt' # params['objective'] = 'binary' # params['metric'] = 'binary_logloss' # params['sub_feature'] = 0.5 # params['num_leaves'] = 10 # params['min_data'] = 50 # params['max_depth'] = 10 # clf = lgb.train(params, d_train, 100) model = get_model_embedding(len(users)) # model = load_model('model.model') model.fit([ finder_decisions_train['Sender_index'].values, finder_decisions_train['Receiver_index'].values ], finder_decisions_train['Decision'].values, epochs=100, verbose=1, batch_size=BATCH_SIZE, validation_data=([ finder_decisions_valid['Sender_index'].values, finder_decisions_valid['Receiver_index'].values ], finder_decisions_valid['Decision'].values), callbacks=get_callbacks())
def test_output_type(): data = load_data("dataset_a") assert isinstance(transform(data), pd.DataFrame)
# coding=utf-8 import sys import matplotlib.dates as mdates import matplotlib.pyplot as plt import pandas from matplotlib.widgets import TextBox, Button from pandas.plotting import register_matplotlib_converters from lib import normalize, load_data, user_events_to_data, KW_GOING_HELI, KW_BACKWARD, widget_log register_matplotlib_converters() user_events_style = {"color": "red", "alpha": 0.33} file_name = sys.argv[1] (gps, alt, user2) = load_data(file_name) user_marks_holder = [user2] if __name__ == '__main__': x = gps['altitude'] smoothed = x.ewm(alpha=0.1).mean() norm_speed = normalize(gps['speed'].ewm(alpha=0.03).mean()) charts_ax = normalize(smoothed).plot(label="smoothed") norm_speed.plot(label="speed", alpha=0.3) plt.gca().xaxis.set_major_locator(mdates.HourLocator()) plt.gca().xaxis.set_major_formatter( mdates.DateFormatter('%H:%M')) # %Y-%m-%d %H:%M error_text = TextBox(plt.axes([0.04, 0.005, 0.8, 0.04]), 'Error',
def data(): print("\n✨✨✨ setUp") yield load_data("dataset_a") print("✨✨✨ tearDown") delete_data()
n_features * 2), random_state=42, max_iter=1200) clf.fit(x_train, y_train) y_pred = clf.predict(x_test) print "Accuracy score: %s" % accuracy_score(y_test, y_pred) return clf except: print "Unexpected error:", sys.exc_info()[0] if __name__ == '__main__': # alt['relativeAltitude'] (gps_train, alt_train, user_train) = load_data("part_1") (gps_test, alt_test, user_test) = load_data("part_2") # X = X1 + X2 # y = y1 + y2 # x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=27) experiment_id = 0 for alpha in (0.001, 0.005, 0.01, 0.015): for const_norm in [True]: for use_data_perc in (0.5, 0.75): for start_second in [80, 100, 120]: for count in [16, 70, 90]: for (wfn, wfn_params) in ( (exp_window, {