def calculate_redundancy(question, questions): f = 0.0 for i in questions: # print f f = max(match(convert(question['body']), convert(i['body'])), f) # f = max(match(convert(question['body']), convert(i['body'])), f) return [f]
def runner(formula1, formula2): formula1 = forseti.parser.parse(formula1) formula2 = forseti.parser.parse(formula2) statement1, steps1 = util.convert(deepcopy(formula1)) statement2, steps2 = util.convert(deepcopy(formula2)) return statement1 == statement2, [statement1, steps1], [statement2, steps2]
def runner(formula1, formula2, adjacency): formula1 = forseti.parser.parse(formula1) formula2 = forseti.parser.parse(formula2) print(type(formula1)) statement1, steps1, isContra1 = util.convert(deepcopy(formula1), adjacency) statement2, steps2, isContra2 = util.convert(deepcopy(formula2), adjacency) return statement1 == statement2, [statement1, steps1, isContra1 ], [statement2, steps2, isContra2], formula1
def render(self, scr): x, y = util.convert(self.row, self.col) l0, h0, l1, h1 = self.bounds self.bounds.topleft = [x - l1 / 2, y - h1 / 2] scr.blit(self.image, self.bounds)
def posts(): " Displays a list of published posts " posts = app.db.session.query(Post).filter(Post.published)\ .order_by(Post.published_dt.desc()) page = convert(request.args.get('page'), int, 1) paginated_posts = posts.paginate(page=page, per_page=8) return render_template('/posts/index.tmpl', posts=paginated_posts)
def augment_data(data_dir: str, specs_dir: str, file_format: str = 'wav', file_names_path: str = None, wav_dir: str = None): """ Augment data and generate spectrogram of audio files. :param data_dir: source directory of audio files :param specs_dir: output directory :param file_format: format of source audio files (default=wav) :param file_names_path: path for file containing file names to process In case no file is provided, all the files of the working source directory will be considered (default=None) :param wav_dir: output for converted audio files in wav format In case no path is provided, a temp file will be generated and removed in sequence (default=None) """ if file_names_path is None: file_names = os.listdir(data_dir) else: file_names = open(file_names_path, 'r').readlines() # for i, line in enumerate(file_names_path.readlines()[ # 1:]): for i, line in enumerate(file_names): file_path = line.split(',')[0] file_name = str(file_path[:-4]) if wav_dir is None: wav_file_name = 'tmp.wav' else: wav_file_name = file_name + '.wav' try: if file_format != 'wav': convert(file_path, wav_dir + '/' + wav_file_name) # augment_wav_data(wav_dir + '/' + wav_file_name, specs_dir, # file_name) # TODO except RuntimeError: print('WARNING: file {} not converted, skipping'.format(file_path)) if wav_dir is None: os.remove(wav_file_name) print("processed %d files" % (i + 1))
def details(): if 'user_id' not in session: return redirect('/login') user, usernames = model.details(session['user_id']) return { 'user': convert(user), 'usernames': usernames }
def parse(str): ret_values = [] default = None step_type = '+' step = 1 if str.startswith('['): # split the list of values from the default value (if present) at_idx = str.find('@') (val, def_val) = (str, None) if at_idx == -1 else (str[:at_idx], str[at_idx+1:]) ret_values = [x.strip() for x in val[val.index('[')+1:val.index(']')].split(',')] if def_val: default = def_val.strip() elif str.startswith('('): # this is a stride operator at_idx = str.find('@') (val, def_val) = (str, None) if at_idx == -1 else (str[:at_idx], str[at_idx+1:]) p = re.compile('\w+') v = p.findall(val) start = int(v[0]) end = int(v[1]) if(len(v) == 3): p = re.compile('\*\w+') if p.findall(val): step_type = '*' step = int(v[2]) # Create values curr = start while curr <= end: ret_values.append(curr) curr = eval( 'curr {0} {1}'.format(step_type, step)) if def_val: default = def_val.strip() else: # this is a single value variable if str.startswith("s\""): str = str.replace('\\\n','')[2:-1] # remove initial 's"' and final '"' ret_values.append(str) # try to convert the values into float/int return ( list(map(lambda x: convert(x), ret_values)), convert(default) )
def post_audio(): mp3_file = flask.request.files['file'] result = client.asr(util.mp3_to_pcm(mp3_file), 'wav', 16000, { 'lan': 'zh', }) asr_response = json.dumps(result, ensure_ascii=False) if 'result' in result: try: result['result'] = util.convert(result['result'][0]) app.logger.info("first -" + asr_response + "second - [" + result['result'] + "]") except Exception, e: app.logger.error("first -" + asr_response + "second -[" + e.message + "]") result['result'] = ''
def reservations(): result = [] for reservation in many(Reservation): if validate(reservation): if reservation.occupied is False: spot = one(Spot, reservation.spot_id) reservation_dict = convert(reservation) reservation_dict['spot_location'] = spot.location result.append(reservation_dict) else: abort(reservation) return result
def from_scryfall(cls, data): printing_col_names = [c.name for c in Printing.__table__.columns] printing_data = util.convert(data, {'set': 'set_code'}) try: if 'image_uris' in printing_data: printing_data['image_uri'] = printing_data['image_uris'][ 'normal'] else: printing_data['image_uri'] = None except KeyError: print(printing_data) raise printing_data = util.restriction(data, printing_col_names) printing_data['card'] = Card.from_scryfall(data) return Printing(**printing_data)
def save_label_txt(img_shape, img_label, save_file): """ 将标签信息转换为yolo格式,并保存 :param img_shape:图片形状长宽 :param img_label:标签 :param save_file:保存文件名 :return: """ height, width, _ = img_shape label_file = open(save_file, 'w') for label in img_label: target_id, x1, y1, x2, y2 = label label_box = (float(x1), float(x2), float(y1), float(y2)) label_yolo = util.convert((width, height), label_box) label_file.write( str(target_id) + " " + " ".join([str(a) for a in label_yolo]) + '\n')
def start(): piano_folder = "25-Piano-Soundfonts" piano_options = convert(os.listdir(piano_folder)) pp.pprint(piano_options) selected_piano = None while selected_piano is None: piano_name = input("Enter Piano: ") selected_piano = piano_options.get(piano_name) SF2 = f"{piano_folder}/{selected_piano}" print("Left Hand") pp.pprint(qwerty_keys_to_standard.get('left')) print("Right Hand") pp.pprint(qwerty_keys_to_standard.get('right')) if not fluidsynth.init(SF2): print("Couldn't load soundfont", SF2) sys.exit(1)
def main(): client = bigquery.Client('vdslab-covid19') table = client.get_table('twitter.test2') api = get_api() max_id = None while True: print(max_id) tweets = api.search('(コロナ OR covid19 OR 武漢肺炎) min_retweets:10', lang='ja', locale='ja', result_type='recent', count=1000, max_id=max_id) if len(tweets) == 0: break client.insert_rows(table, [convert(status._json) for status in tweets]) max_id = tweets.max_id time.sleep(5)
def capture(self): if not self.valid: return self.empty # Read a frame. ret, frame = self.feed.read() # Resize the frame. frame = cv2.resize(frame, (CAM_WIDTH, CAM_HEIGHT)) # Convert the frame to RGB. frame = util.convert(frame) if not ret: # Something went wrong with the capture. return self.empty return frame
def __init__(self, row, col, terrain=0, base=-1, crystals=0): pygame.sprite.Sprite.__init__(self) self.row = int(row) self.col = int(col) self.cx, self.cy = util.convert(self.row, self.col) self.base = int(base) self.crystals = int(crystals) self.terrain = self.TERRAIN_TYPE[int(terrain)] self.crystalImage = pygame.image.load(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'assets/crystal.png')) if terrain == "3": self.image = pygame.image.load(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'assets/rock.png')) else: self.image = None self.edges = ( (self.cx, self.cy - self.HEIGHT), (self.cx + self.WIDTH, self.cy - self.HEIGHT / 2), (self.cx + self.WIDTH, self.cy + self.HEIGHT / 2), (self.cx, self.cy + self.HEIGHT), (self.cx - self.WIDTH, self.cy + self.HEIGHT / 2), (self.cx - self.WIDTH, self.cy - self.HEIGHT / 2), )
def main(file_name): # Instance the rsa class rsa = alg.rsa() keys_gen = rsa.generate_key_pair() session_key = get_random_bytes(16) # Random user's key 128 bits # Convert file from utf-8 to bytes convert = util.convert(file_name) text_to_bytes = convert.to_bytes() # Encrypt and decrypt the session key key_encrypted = rsa.encrypt_key(session_key) key_decrypted = rsa.decrypt_key(key_encrypted) # Instance the aes_cbc class aes = alg.aes_cbc(key_decrypted, text_to_bytes) encrypt_message = aes.encrypt_message() iv = encrypt_message[1] # Export the initialize vector # Decrypt the message into the file decrypt_message = aes.decrypt_message(iv) return encrypt_message[0], decrypt_message
# print e2[0].find("div",class_="image") for elem in e2: t = elem.find("div",class_="text") res_name = t.find("div",class_="text-cnt Restaurants").a.text res_cui = unescape(t.find("div",class_="text-cnt Restaurants").p.text) # str.decode("utf-8").replace(res_cui, "@") special = u"\u2022" res_cui = res_cui.replace(special,'@') res_cui = parseres_name(res_cui) stats = t.select(".text-stats") res_data["name"] = res_name res_data["cuisine"] = res_cui res_data["food"] = convert(t.select(".i-number.i-number-red")[0].text) res_data["decor"] = convert(t.select(".i-number")[0].text) res_data["service"] = convert(t.select(".i-number")[1].text) res_data["cost"] = convert(t.select(".i-number")[2].text) print res_data newd = res_data.copy() res_data_list.append(newd) print res_data_list print len(res_data_list) writetofile(res_data_list) # st2r = "".expandtabs() #
def spots(): refresh() return convert(many(Spot))
def callback(ch, method, properties, body): id_, datum_ = convert(body) update_jubatus(datum_) print "update succeeded (ID: " + str(id_) + ")"
# coding: utf-8 import util import pandas as pd import pickle path = 'data/' train = pd.read_csv(path + 'train.csv') test = pd.read_csv(path + 'test.csv') # convert attributes to wide tables event_type = util.convert(pd.read_csv(path + 'event_type.csv'),add_count=True,count_column='event_type_count') # usecols - to control which columns to be parsed log_feature = util.convert(pd.read_csv(path + 'log_feature.csv'),fill='zero',add_count=True,count_column='log_feature_count') resource_type = util.convert(pd.read_csv(path + 'resource_type.csv'), add_count=True,count_column='resource_type_count') severity_type = util.convert(pd.read_csv(path + 'severity_type.csv')) location = util.convert(pd.read_csv(path + 'location.csv'),add_count=True,count_column='location_count') # move id to the index, for merge purpose event_type.set_index('id', inplace=True) log_feature.set_index('id', inplace=True) resource_type.set_index('id', inplace=True) severity_type.set_index('id', inplace=True) location.set_index('id', inplace=True) train.set_index('id', inplace=True) test.set_index('id', inplace=True) temp = train.drop(['location','fault_severity'], axis=1)
parser = argparse.ArgumentParser() parser.add_argument('config_file', help='Config file', type=str) parser.add_argument( '--repeat', help='Whether or not to repeat the experiment', type=int, default=1) parser.add_argument( '--plot_data', help='Whether or not to plot the data', action='store_true') parser.add_argument( '--cv_config_file', help='Config file after cv', type=str, default="") parser.add_argument( '--n_jobs', help='Number of jobs running in parallel', type=int, default=2) args = parser.parse_args() config_file = args.config_file config = util.convert(json.loads(open(config_file).read())) cv_config_file = args.cv_config_file plot_data = args.plot_data repeat = args.repeat n_jobs = args.n_jobs dataset_config = config["dataset"] classifiers = config["classifiers"] cross_validation = config["cross_validation"] use_kfold = True if "n_folds" in config: n_folds = config["n_folds"] if "test_size" in config: use_kfold = False test_size = config["test_size"] cv_n_iter = config["cv_n_iter"]
from sklearn.preprocessing import StandardScaler import preprocessing import argparse import util args = util.parse_cmdline() print(args) if args.precision == 'double': type = 'f8' else: type = 'f4' # print("Decompressing the dataset...", end=" ") print("Decompressing the dataset...") images = util.convert(args.data, args.labels, args.N) images = preprocessing.filter_classes(images, args.classes, type) Y = preprocessing.get_binary_labels(images, args.classes) X = preprocessing.get_data(images, type) X_tr, X_ts, Y_tr, Y_ts = train_test_split(X, Y, test_size=(1 - args.train), random_state=4) mean = X_tr.mean() std = X_tr.std() X_tr = (X_tr - mean) / std X_ts = (X_ts - mean) / std print(X_tr.mean(), X_tr.std())
Path(__file__).resolve().parents[2] / "data_concelhos_incidencia.csv") PATH_TO_CSV_14DIAS = str( Path(__file__).resolve().parents[2] / "data_concelhos_14dias.csv") # Get list of municipalities concelhos_df = get_list_municipalities() # Get list of cases casos_df = get_list_cases_long() casos_df["confirmados_14"] = (casos_df["incidencia"] * casos_df["population"] / 100000.0) casos_df["confirmados_1"] = casos_df["confirmados_14"].div(14) cols = ["confirmados_14", "confirmados_1"] casos_df = convert(casos_df, cols, convert_to_int) cols = ["incidencia"] casos_df = convert(casos_df, cols, convert_to_float) cols = [x for x in casos_df.columns if x.startswith("densidade")] casos_df = convert(casos_df, cols, convert_to_float) cols = [x for x in casos_df.columns if x.startswith("population")] casos_df = convert(casos_df, cols, convert_to_int) cols = list(casos_df.columns) for i in ["data", "confirmados_14", "confirmados_1"]: cols.remove(i) cols.insert(cols.index("concelho") + 1, "confirmados_14") cols.insert(cols.index("concelho") + 2, "confirmados_1") cols.insert(0, "data") casos_df = casos_df[cols]
updated = updated.melt(id_vars=["data"], var_name="Concelho", value_name="Casos") updated = updated.merge( population[["Concelho", "2019"]], how="left", left_on="Concelho", right_on="Concelho", ) updated.fillna(0, inplace=True) updated["ratio"] = round( updated["Casos"] * 100 * 1000 / updated["2019"], 1) updated = updated.pivot_table(values="ratio", index="data", columns="Concelho") updated = updated.reset_index(level=0) cols = [x for x in updated.columns if x != "data"] updated[cols] = updated[cols].diff(2) # 14 days updated = updated[2:] func = convert_to_int if i == DATA_CONCELHOS_14DIAS_CSV else convert_to_float updated = convert(updated, cols, func) # sort by date updated = updated.sort_values("data") # convert back into dd-mm-yyyy updated["data"] = updated["data"].dt.strftime("%d-%m-%Y") updated.to_csv(i, index=False, line_terminator="\n")
transport._preferred_keys = [ hostkey.get_name() ] key = transport.get_remote_server_key() if (key.get_name() != hostkey.get_name() or str(key) != str(hostkey)): log.error('Bad host key from server (%s).' % name) raise AuthenticationError('Bad host key from server (%s).' % name) log.info('Server host key verified (%s) for %s' % (key.get_name(), name)) privkey = cipher.decipher(tags['site'].get('privkey', tags['site'].get('pkey', ''))) password = cipher.decipher(tags['site'].get('password', '')) password_encoding = tags['site'].get('password_encoding', 'utf8') password = convert(password, password_encoding) authentified = False if privkey: privkey = util.get_dss_key_from_string(privkey) try: transport.auth_publickey(tags['site']['login'], privkey) authentified = True except AuthenticationException: log.warning('PKey for %s was not accepted' % name) if not authentified and password: try: transport.auth_password(tags['site']['login'], password) authentified = True except AuthenticationException:
import json, os import util setting_path = os.path.join(os.path.dirname(__file__), 'settings.json') with open(setting_path) as settingFile: settings = util.convert(json.load(settingFile)) extras = ['papertype', 'key', 'extra'] settings['lookup_fields'] = settings['bib_fields'] + extras + ['thing'] settings['fields'] = settings['bib_fields'] + extras
def set_length(path, objct, dur): aud = objct(path) aud.info.length = convert(dur) aud.save()
MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'],], processEscapes: true } }); </script> {%renderer%} </footer> </body> </html> ''' f = open(os.path.join(filepath, "q2_particle_in_an_infinite_potential_box.md"), "r") text_list = util.convert(f.read(), lateximg=True, addbutton=True, addtoc=True) mds = [] print("Total {} blocks of Markdown".format(len(text_list))) for t in text_list: tmp = dcc.Markdown(t, dangerously_allow_html=True) mds.append(tmp) ##################################################################### def get_1dbox(n=5, L=10, num_me=1, all_levels=False): # Defining the wavefunction def psi(x, n, L): return np.sqrt(2.0 / L) * np.sin(float(n) * np.pi * x / L) def get_energy(n, L, m):
'arslvt': 3, 'arsalentejo': 4, 'arsalgarve': 5, 'madeira': 6, 'açores': 7, 'outro': 8, 'all': 9, } cols = sorted(cols, key=lambda x: ARS_ORDER[x.split('_')[-1]]) data_regional = data_regional[cols] # concatena tudo numa wiiiiiide table data_wide = pd.concat([data_general, data_ages, data_regional], axis=1) # limpa dados - inteiros, # e floats com 10 casas para prevenir inconsistencias entre plataformas 0.3(3) cols = [ x for x in data_wide.columns if not x.startswith("data") and not 'perc' in x ] data_wide = convert(data_wide, cols, convert_to_int) cols = [x for x in data_wide.columns if 'perc' in x] data_wide[cols] = data_wide[cols].apply(lambda x: round(x, 10)) # recalcula a data, just in case data_wide['data'] = data_wide.index data_wide['data'] = data_wide['data'].apply( lambda x: x.strftime('%d-%m-%Y')) data_wide.to_csv(PATH_TO_ROOT / 'vacinas_detalhe.csv', index=False)
def from_scryfall(cls, data): mapping = {'released_at': 'release_date'} data = util.convert(data, mapping) data['release_date'] = date.fromisoformat(data['release_date']) col_names = [c.name for c in Set.__table__.columns] return Set(**util.restriction(data, col_names))
def garages(): refresh() return convert(many(Garage))
casos_df = get_list_cases_long() # Merge list of cases with list of municipalities casos_df = concelhos_df.merge(casos_df, how="left", on="concelho") # Helper for pivot table casos_df.loc[casos_df.data.isna(), ["confirmados"]] = -1 casos_df.loc[casos_df.data.isna(), ["data"]] = "24-03-2020" casos_df = casos_df.sort_values(by=["concelho"]) # Convert long table to wide table casos_wide = pd.pivot_table(casos_df, values="confirmados", index="data", columns="concelho") casos_wide = casos_wide.reset_index() casos_wide.data = pd.to_datetime(casos_wide.data, format="%d-%m-%Y") casos_wide = casos_wide.sort_values(by="data").reset_index(drop=True) casos_wide = casos_wide.replace(-1, np.nan) casos_wide = patch_concelhos1(casos_wide) casos_wide.data = casos_wide["data"].dt.strftime("%d-%m-%Y") casos_wide = patch_concelhos2(casos_wide) cols = [x for x in casos_wide.columns if not x.startswith("data")] casos_wide = convert(casos_wide, cols, convert_to_int) casos_wide.to_csv(PATH_TO_CSV, index=False, sep=",")
def calculate_redundancy(question, questions): f = 0.0 for i in questions: f = max(match(convert(question['body']), convert(i['body'])), f) return [f]
def train_conv_net(datasets, U, idx_word_map, img_w=300, filter_hs=[3, 4, 5], hidden_units=[100, 2], dropout_rate=[0.5], shuffle_batch=True, n_epochs=25, batch_size=50, lr_decay=0.95, conv_non_linear="relu", activations=[Iden], sqr_norm_lim=9, non_static=True, sen_dropout_rate=[0.0], whether_train_sen=True): rng = np.random.RandomState(3435) img_h = datasets[0][0][0].shape[0] - 1 filter_w = img_w feature_maps = hidden_units[0] filter_shapes = [] pool_sizes = [] for filter_h in filter_hs: filter_shapes.append((feature_maps, 1, filter_h, filter_w)) pool_sizes.append((img_h - filter_h + 1, img_w - filter_w + 1)) parameters = [("image shape", img_h, img_w), ("filter shape", filter_shapes), ("hidden_units", hidden_units), ("dropout", dropout_rate), ("batch_size", batch_size), ("non_static", non_static), ("learn_decay", lr_decay), ("conv_non_linear", conv_non_linear), ("non_static", non_static), ("sqr_norm_lim", sqr_norm_lim), ("shuffle_batch", shuffle_batch), ('sentence dropout rate', sen_dropout_rate)] print(parameters) #define model architecture index = T.lscalar() x = T.tensor3('x') y = T.ivector('y') sen_x = T.matrix('sen_x') mark = T.matrix('mark') sen_y = T.ivector('sen_y') Words = theano.shared(value=U, name="Words") zero_vec_tensor = T.vector() zero_vec = np.zeros(img_w) set_zero = theano.function([zero_vec_tensor], updates=[ (Words, T.set_subtensor(Words[0, :], zero_vec_tensor)) ], allow_input_downcast=True) layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape( (x.shape[0] * x.shape[1], 1, x.shape[2], Words.shape[1])) sen_layer0_input = Words[T.cast(sen_x.flatten(), dtype='int32')].reshape( (sen_x.shape[0], 1, sen_x.shape[1], Words.shape[1])) conv_layers = [] layer1_inputs = [] Doc_length = datasets[0][0].shape[0] sen_layer1_inputs = [] for i in xrange(len(filter_hs)): filter_shape = filter_shapes[i] pool_size = pool_sizes[i] conv_layer = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(None, 1, img_h, img_w), filter_shape=filter_shape, poolsize=pool_size, non_linear=conv_non_linear) layer1_input = conv_layer.output.flatten(2) conv_layers.append(conv_layer) layer1_inputs.append(layer1_input) sen_layer1_input = conv_layer.predict(sen_layer0_input, None).flatten(2) sen_layer1_inputs.append(sen_layer1_input) layer1_input = T.concatenate(layer1_inputs, 1) sen_layer1_input = T.concatenate(sen_layer1_inputs, 1) hidden_units[0] = feature_maps * len(filter_hs) sen_hidden_units = [feature_maps * len(filter_hs), 3] shaped_mark = T.flatten(mark) sen_classifier1 = MLPDropout(rng, input=sen_layer1_input, layer_sizes=sen_hidden_units, activations=activations, dropout_rates=sen_dropout_rate) sen_cost = sen_classifier1.dropout_negative_log_likelihood(sen_y) sen_pos_prob = T.max( sen_classifier1.predict_p(layer1_input)[:, np.array([0, 2])], axis=1) prev_layer1_output, updates = theano.scan( fn=lambda i, x: x[i * Doc_length:i * Doc_length + Doc_length], sequences=[T.arange(batch_size)], non_sequences=layer1_input * (sen_pos_prob.dimshuffle(0, 'x')) * (shaped_mark.dimshuffle(0, 'x'))) layer1_output = T.sum(prev_layer1_output, axis=1) classifier = MLPDropout(rng, input=layer1_output, layer_sizes=hidden_units, activations=activations, dropout_rates=dropout_rate) #define parameters of the model and update functions using adadelta params = classifier.params for conv_layer in conv_layers: params += conv_layer.params if non_static: params += [Words] #add sentence level parameters sen_params = sen_classifier1.params for conv_layer in conv_layers: sen_params += conv_layer.params if non_static: sen_params += [Words] cost = classifier.negative_log_likelihood(y) dropout_cost = classifier.dropout_negative_log_likelihood(y) grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6, sqr_norm_lim) sen_grad_updates = sgd_updates_adadelta(sen_params, sen_cost, lr_decay, 1e-6, sqr_norm_lim) np.random.seed(3435) train_mask = np.zeros((datasets[0].shape[0], datasets[0].shape[1]), dtype='float32') ##doc length * number of documnts test_mask = np.zeros((datasets[2].shape[0], datasets[2].shape[1]), dtype='float32') #set the mask for i in range(datasets[0].shape[0]): for j in range(datasets[0][i].shape[0]): if np.count_nonzero(datasets[0][i][j]) != 0: train_mask[i][j] = 1.0 for i in range(datasets[2].shape[0]): for j in range(datasets[2][i].shape[0]): if np.count_nonzero(datasets[2][i][j]) != 0: test_mask[i][j] = 1.0 if datasets[0].shape[0] % batch_size > 0: extra_data_num = batch_size - datasets[0].shape[0] % batch_size permuted_index = np.random.permutation(range(datasets[0].shape[0])) permuted_index = np.append(permuted_index, permuted_index[:extra_data_num]) new_data = datasets[0][permuted_index] else: permuted_index = np.random.permutation(range(datasets[0].shape[0])) new_data = datasets[0][permuted_index] n_batches = new_data.shape[0] / batch_size n_train_batches = int(np.round(n_batches * 0.9)) #divide train set into train/val sets train_set_y = datasets[1][permuted_index] test_set_x, test_set_y = shared_dataset( (datasets[2][:, :, :-1], datasets[3])) test_set_mark = theano.shared(test_mask.astype(theano.config.floatX)) train_mask = train_mask[permuted_index] train_set_mark = train_mask[:n_train_batches * batch_size] train_set_mark = theano.shared(train_set_mark.astype(theano.config.floatX)) train_set_with_sen_label = new_data[:n_train_batches * batch_size] val_set_with_sen_label = new_data[n_train_batches * batch_size:] train_set = new_data[:n_train_batches * batch_size, :, :-1] train_set_label = train_set_y[:n_train_batches * batch_size] val_set = new_data[n_train_batches * batch_size:, :, :-1] val_set_label = train_set_y[n_train_batches * batch_size:] val_set_mark = train_mask[n_train_batches * batch_size:] val_set_mark = theano.shared(val_set_mark.astype(theano.config.floatX)) train_set_x, train_set_y = shared_dataset((train_set, train_set_label)) val_set_x, val_set_y = shared_dataset((val_set, val_set_label)) n_val_batches = n_batches - n_train_batches val_model = theano.function( [index], classifier.errors(y), givens={ x: val_set_x[index * batch_size:(index + 1) * batch_size], y: val_set_y[index * batch_size:(index + 1) * batch_size], mark: val_set_mark[index * batch_size:(index + 1) * batch_size] }, allow_input_downcast=True) #compile theano functions to get train/val/test errors test_model = theano.function( [index], classifier.errors(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], mark: train_set_mark[index * batch_size:(index + 1) * batch_size] }, allow_input_downcast=True) train_model = theano.function( [index], cost, updates=grad_updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], mark: train_set_mark[index * batch_size:(index + 1) * batch_size] }, allow_input_downcast=True) test_pred_layers = [] test_size = datasets[2].shape[0] test_batch_size = 1 n_test_batches = int(math.ceil(test_size / float(test_batch_size))) test_layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape( (x.shape[0] * x.shape[1], 1, x.shape[2], Words.shape[1])) for conv_layer in conv_layers: test_layer0_output = conv_layer.predict(test_layer0_input, test_batch_size * Doc_length) test_pred_layers.append(test_layer0_output.flatten(2)) test_layer1_input = T.concatenate(test_pred_layers, 1) test_sen_prob = T.max( sen_classifier1.predict_p(test_layer1_input)[:, np.array([0, 2])], axis=1) test_sen_prob_to_sen, updates = theano.scan( fn=lambda i, x: x[i * Doc_length:i * Doc_length + Doc_length], sequences=[T.arange(test_batch_size)], non_sequences=test_sen_prob) sorted_index = T.argsort(test_sen_prob_to_sen * shaped_mark, axis=-1)[:, -5:] sorted_sentence, updates = theano.scan( fn=lambda i, y: y[i, sorted_index[i], :], sequences=[T.arange(sorted_index.shape[0])], non_sequences=x) sorted_prob, updates = theano.scan( fn=lambda i, z: z[i, sorted_index[i]], sequences=[T.arange(sorted_index.shape[0])], non_sequences=test_sen_prob_to_sen) sorted_sentence_value = theano.function( [index], sorted_sentence, allow_input_downcast=True, givens={ x: test_set_x[index * test_batch_size:(index + 1) * test_batch_size], mark: test_set_mark[index * test_batch_size:(index + 1) * test_batch_size] }) sorted_prob_val = theano.function( [index], sorted_prob, allow_input_downcast=True, givens={ x: test_set_x[index * test_batch_size:(index + 1) * test_batch_size], mark: test_set_mark[index * test_batch_size:(index + 1) * test_batch_size] }) test_layer1_output, updates = theano.scan( fn=lambda i, x: x[i * Doc_length:i * Doc_length + Doc_length], sequences=[T.arange(test_batch_size)], non_sequences=test_layer1_input * (test_sen_prob.dimshuffle(0, 'x')) * (shaped_mark.dimshuffle(0, 'x'))) test_layer1_output = T.sum(test_layer1_output, axis=1) test_y_pred = classifier.predict(test_layer1_output) test_error = T.mean(T.neq(test_y_pred, y)) test_model_all = theano.function( [index], test_error, allow_input_downcast=True, givens={ x: test_set_x[index * test_batch_size:(index + 1) * test_batch_size], y: test_set_y[index * test_batch_size:(index + 1) * test_batch_size], mark: test_set_mark[index * test_batch_size:(index + 1) * test_batch_size], }) print('... training') epoch = 0 best_val_perf = 0 val_perf = 0 test_perf = 0 cost_epoch = 0 sen_batch_size = 50 best_sen_param = [] for p in sen_params: best_sen_param.append(theano.shared(p.get_value())) #first training on sentences best_sen_val = 0.0 if whether_train_sen == True: print('pre-train on sentences') while (epoch < 20): sen_costs = [] train_sen = train_set_with_sen_label train_sentences = util.doc_to_sen(train_sen) train_sentences = util.remove(train_sentences) train_sentences = util.downsample_three(train_sentences) print("positive sentences after sampling: " + str(np.sum(train_sentences[:, -1] == 2))) print("negative sentences after sampling: " + str(np.sum(train_sentences[:, -1] == 0))) print("neutral sentences after sampling: " + str(np.sum(train_sentences[:, -1] == 1))) train_sentences = np.random.permutation(train_sentences) if train_sentences.shape[0] % sen_batch_size != 0: extra_data_num = sen_batch_size - train_sentences.shape[ 0] % sen_batch_size extra_index = np.random.permutation( range(train_sentences.shape[0]))[:extra_data_num] train_sentences = np.vstack( (train_sentences, train_sentences[extra_index])) train_sen_x, train_sen_y = shared_dataset( (train_sentences[:, :-1], train_sentences[:, -1])) train_sen_model = theano.function( [index], sen_cost, updates=sen_grad_updates, givens={ sen_x: train_sen_x[index * sen_batch_size:(index + 1) * sen_batch_size], sen_y: train_sen_y[index * sen_batch_size:(index + 1) * sen_batch_size] }) n_train_sen_batches = train_sentences.shape[0] / sen_batch_size for minibatch_index_1 in np.random.permutation( range(n_train_sen_batches)): cur_sen_cost = train_sen_model(minibatch_index_1) sen_costs.append(cur_sen_cost) set_zero(zero_vec) print("training sentence cost: " + str(sum(sen_costs) / len(sen_costs))) val_sen = val_set_with_sen_label val_sentences = util.doc_to_sen(val_sen) val_sentences = util.remove(val_sentences) print("positive sentences in the validation set: " + str(np.sum(val_sentences[:, -1] == 2))) print("negative sentences in the validation set: " + str(np.sum(val_sentences[:, -1] == 0))) print("neutral sentences in the validation set: " + str(np.sum(val_sentences[:, -1] == 1))) val_sen_x, val_sen_y = shared_dataset( (val_sentences[:, :-1], val_sentences[:, -1])) val_sen_model = theano.function([], sen_classifier1.errors(sen_y), givens={ sen_x: val_sen_x, sen_y: val_sen_y }) val_accuracy = 1 - val_sen_model() print("validation sentence accuracy: " + str(val_accuracy)) if val_accuracy > best_sen_val: best_sen_val = val_accuracy for i, p in enumerate(best_sen_param): p.set_value(sen_params[i].get_value()) epoch = epoch + 1 for i, sp in enumerate(sen_params): sp.set_value(best_sen_param[i].get_value()) #train on documents epoch = 0 while (epoch < n_epochs): start_time = time.time() epoch = epoch + 1 if shuffle_batch: for minibatch_index in np.random.permutation( range(n_train_batches)): cost_epoch = train_model(minibatch_index) set_zero(zero_vec) else: for minibatch_index in xrange(n_train_batches): cost_epoch = train_model(minibatch_index) set_zero(zero_vec) train_losses = [test_model(i) for i in xrange(n_train_batches)] train_perf = 1 - np.mean(train_losses) val_losses = [val_model(i) for i in xrange(n_val_batches)] val_perf = 1 - np.mean(val_losses) print( 'epoch: %i, training time: %.2f secs, train perf: %.2f %%, val perf: %.2f %%' % (epoch, time.time() - start_time, train_perf * 100., val_perf * 100.)) if val_perf >= best_val_perf: best_val_perf = val_perf test_loss = [test_model_all(i) for i in xrange(n_test_batches)] test_perf = 1 - np.sum(test_loss) / float(test_size) print("best test performance so far: " + str(test_perf)) test_loss = [test_model_all(i) for i in xrange(n_test_batches)] new_test_loss = [] for i in test_loss: new_test_loss.append(np.asscalar(i)) test_loss = new_test_loss correct_index = np.where(np.array(test_loss) == 0)[0] count_pos = 0 test_labels = np.array(datasets[3]) # sample two correctly predicted positive documents and two correctly predicted negative documents # for each document, generate top five rationales with highest probabilities print("negative estimated rationales: ") print(len(idx_word_map)) for c in correct_index: if test_labels[c] == 1: continue print(util.convert(sorted_sentence_value(c)[0], idx_word_map)) print(sorted_prob_val(c)) count_pos += 1 if count_pos == 2: break count_neg = 0 print("positive estimated rationales: ") for c in correct_index: if test_labels[c] == 0: continue print(util.convert(sorted_sentence_value(c)[0], idx_word_map)) print(sorted_prob_val(c)) count_neg += 1 if count_neg == 2: break return test_perf
#print(key,key1,key2) key=util.urlencode(key,cfg[4]) key=key.replace('%5C','%') print(key) scfg=[] n,t=lg.getsearchparam(key) for i in range(n): scfg.append(t[i+1]) print(t[i+1]) # navigate res = request.urlopen(cfg[5]) print(res.status, res.reason) s = res.read() s = util.convert(s,cfg[1]) with open(file,'wb') as f: f.write(s) #gethomecomics comics = [] n,t=lg.gethomecomics(file) for i in range(n): comics.append(t[i+1]) #print(i+1,t[i+1]) print(comics[0]) #cats cats = [] comics = [] n,t=lg.getcats(file) for i in range(n): cats.append(t[i+1])
def details(reservation: Reservation): spot = one(Spot, reservation.spot_id) garage_dict = convert(one(Garage, spot.garage_id)) garage_dict['spot'] = convert(spot) garage_dict.pop('spots') return garage_dict
def tick(self): # Read from the camera. self.camera_frame = self.camera.get() if self.live_segment: if self.live_segment_ready: self.live_segment_ready = False self.model_process.submit(COMMAND_SEGMENT, (EVENT_CAMERA_SEGMENT, self.camera_frame)) else: self.camera_segmented = None # Display the current camera frame. camera_combined = self.camera_frame if self.camera_segmented is not None: camera_combined = self.camera_frame // 3 + self.camera_segmented util.display_image(CAM_WINDOW, camera_combined) # Capture a pressed key. self.key = cv2.waitKey(1) & 0xff # Toggle live segmenting if the live segmenting key is pressed. if self.key_pressed(KEY_LIVE): self.live_segment = not self.live_segment # Capture a frame if the capture key is pressed. if self.key_pressed(KEY_CAPTURE): self.capture(self.camera_frame) # Open a file if the open key is pressed. if self.key_pressed(KEY_OPEN): path = input('path> ') try: image = cv2.imread(path) image = util.convert(image) image = cv2.resize(image, (CAM_WIDTH, CAM_HEIGHT)) self.capture(image) print('Image loaded') except: print('Invalid path') # Export the segmented image if the export key is pressed. if self.key_pressed(KEY_EXPORT): if self.canvas is not None: path = input('path> ') try: cv2.imwrite(path, util.convert(self.canvas.get_combined())) print('Image saved') except: print('Invalid path') # Fill the canvas if the fill key is pressed. if self.key_pressed(KEY_FILL): if self.canvas is not None: self.canvas.fill() # Process the segment map if the process key is pressed. if self.key_pressed(KEY_PROCESS): if self.canvas is not None: self.process(self.canvas.get_map()) # Save the result if the save key is pressed. if self.key_pressed(KEY_SAVE): if self.im_processed is not None: path = input('path> ') try: cv2.imwrite(path, util.convert(self.im_processed)) print('Image saved') except: print('Invalid path') # Segment the result if the test key is pressed. if self.key_pressed(KEY_TEST): if self.im_processed is not None: self.capture(self.im_processed) # Quit if the quit key is pressed. if self.key_pressed(KEY_QUIT): self.camera.stop() self.model_process.stop() return False # Tick the model process. self.model_process.tick() return True
def callback(ch, method, properties, body): id_, datum_ = convert(body) # id_ not used score = analyze_jubatus(datum_) print score
def get_data(): path = "data/" train = pd.read_csv(path + "train.csv") test = pd.read_csv(path + "test.csv") # convert attributes to wide tables event_type = util.convert(pd.read_csv(path + "event_type.csv"), add_count=True, count_column="event_type_count") # usecols - to control which columns to be parsed log_feature = util.convert( pd.read_csv(path + "log_feature.csv"), fill="zero", add_count=True, count_column="log_feature_count" ) resource_type = util.convert( pd.read_csv(path + "resource_type.csv"), add_count=True, count_column="resource_type_count" ) severity_type = util.convert(pd.read_csv(path + "severity_type.csv")) location = util.convert(pd.read_csv(path + "location.csv"), add_count=True, count_column="location_count") # move id to the index, for merge purpose event_type.set_index("id", inplace=True) log_feature.set_index("id", inplace=True) resource_type.set_index("id", inplace=True) severity_type.set_index("id", inplace=True) location.set_index("id", inplace=True) train.set_index("id", inplace=True) test.set_index("id", inplace=True) temp = train.drop(["location", "fault_severity"], axis=1) # In[11]: # merge with training dataset id_event_type = pd.merge(temp, event_type, left_index=True, right_index=True, how="inner") id_log_feature = pd.merge(temp, log_feature, left_index=True, right_index=True, how="inner") id_resource_type = pd.merge(temp, resource_type, left_index=True, right_index=True, how="inner") id_severity_type = pd.merge(temp, severity_type, left_index=True, right_index=True, how="inner") id_location = pd.merge(temp, location, left_index=True, right_index=True, how="inner") train = train.drop("location", axis=1) df_train = pd.concat( [train, id_event_type, id_log_feature, id_resource_type, id_severity_type, id_location], axis=1 ) # In[14]: # do the similar for test dataset temp = test.drop("location", axis=1) t_id_event_type = pd.merge(temp, event_type, left_index=True, right_index=True, how="inner") t_id_log_feature = pd.merge(temp, log_feature, left_index=True, right_index=True, how="inner") t_id_resource_type = pd.merge(temp, resource_type, left_index=True, right_index=True, how="inner") t_id_severity_type = pd.merge(temp, severity_type, left_index=True, right_index=True, how="inner") t_id_location = pd.merge(temp, location, left_index=True, right_index=True, how="inner") df_test = pd.concat( [temp, t_id_event_type, t_id_log_feature, t_id_resource_type, t_id_severity_type, t_id_location], axis=1 ) df_train = df_train.reset_index() df_test = df_test.reset_index() X_train, y_train = df_train.ix[:, 2:], df_train.ix[:, 1] X_test = df_test.ix[:, 1:] X_ids = df_test.ix[:, 0] output = {} output["X_train"] = X_train output["y_train"] = y_train output["X_test"] = X_test output["X_ids"] = X_ids return output
critic, t.optim.Adam, nn.MSELoss(reduction='sum'), actor_learning_rate=1e-5, critic_learning_rate=1e-4) episode, step, reward_fulfilled = 0, 0, 0 smoothed_total_reward = 0 while episode < max_episodes: episode += 1 total_reward = 0 terminal = False step = 0 hidden = t.zeros([1, 1, 256]) state = convert(env.reset()) tmp_observations = [] while not terminal: step += 1 with t.no_grad(): old_state = state # agent model inference old_hidden = hidden action, _, _, hidden = rppo.act({ "mem": state, "hidden": hidden }) state, reward, terminal, _ = env.step(action.item()) state = convert(state) total_reward += reward
if not entries: return None if len(entries) is 1: entries[0]['extra'] = None return [entries[0]] return entries if __name__ == '__main__': con = Controller() keys = ['wanght2008', 'guptat2007', 'lencckn2010', 'guptat2007-1', 'guptat2007-11'] for key in keys: entries = con.get(key) if not entries: print 'WARNING: no entries for %s--------------------' % key continue elif len(entries) > 1: print 'WARNING: multiple entries for %s--------------' % key else: print '-------------------------------------------------------' for x in entries: print util.convert(bib_string(x))