def encode(doc_chunks, chunks_mask, model, batch_size=32): embeddings, doc_ids, par_ids = [], [], [] chunks, masks = doc_chunks, chunks_mask if isinstance(doc_chunks, dict) and isinstance(chunks_mask, dict): chunks, masks = [], [] for doc_id, doc in tqdm(doc_chunks.items(), desc='Generating chunks embeddings...'): doc_ids.append(doc_id) if isinstance(doc, list): chunks.append(util.flatten_list(doc)) masks.append(util.flatten_list(chunks_mask[doc_id])) elif isinstance(doc, dict): for par_id, par in doc.items(): par_ids.append(par_id) if isinstance(par, list): chunks.append(util.flatten_list(par)) masks.append(util.flatten_list(chunks_mask[doc_id][par_id])) else: chunks.append(torch.flatten(par)) masks.append(torch.flatten(chunks_mask[doc_id][par_id])) else: chunks.append(torch.flatten(doc)) masks.append(torch.flatten(chunks_mask[doc_id])) #elif isinstance(doc_chunks, list) and isinstance(masks, list): chunks = torch.stack(chunks, axis=0) masks = torch.stack(masks, axis=0) embeddings = encode_chunks(chunks, masks, model, batch_size) # print(f"Shape of embeddings {embeddings.shape}") return embeddings
def handle_for(block, instance_vars, stack): validate_for_loop_syntax(block) tokens = clean_up_list_elems(flatten_list(re.split("for\s*\(", block))) tokens = clean_up_list_elems(flatten_list(list(map(lambda x: re.split("\)\s*\{", x), tokens)))) tokens = clean_up_list_elems(flatten_list(list(map(lambda x: re.split("\}", x), tokens)))) tokens = tokens[0].split(";") + [tokens[1]] initialize, condition, update, statements = tokens assign_variable(initialize, instance_vars, stack) """ evaluated_condition = evaluate_expression(condition, instance_vars, stack) if type(evaluated_condition) is not bool: raise InvalidForLoopException("Boolean condition is of wrong type") """ while evaluate_expression(condition, instance_vars, stack): parse_eval(statements, instance_vars, stack) assign_variable(update, instance_vars, stack) # Assumes well-formed expression. var_name = initialize.split(" ")[1] get_variable_frame(var_name, instance_vars, stack).pop(var_name) return
def handle_for(block, instance_vars, stack): validate_for_loop_syntax(block) tokens = clean_up_list_elems(flatten_list(re.split("for\s*\(", block))) tokens = clean_up_list_elems( flatten_list(list(map(lambda x: re.split("\)\s*\{", x), tokens)))) tokens = clean_up_list_elems( flatten_list(list(map(lambda x: re.split("\}", x), tokens)))) tokens = tokens[0].split(";") + [tokens[1]] initialize, condition, update, statements = tokens assign_variable(initialize, instance_vars, stack) """ evaluated_condition = evaluate_expression(condition, instance_vars, stack) if type(evaluated_condition) is not bool: raise InvalidForLoopException("Boolean condition is of wrong type") """ while evaluate_expression(condition, instance_vars, stack): parse_eval(statements, instance_vars, stack) assign_variable(update, instance_vars, stack) # Assumes well-formed expression. var_name = initialize.split(" ")[1] get_variable_frame(var_name, instance_vars, stack).pop(var_name) return
def handle_conditional_statements(if_else_block, instance_vars, stack): has_else_clause = verify_if_else_syntax(if_else_block) # Tokenize if-else block: split conditions from statements tokens = clean_up_list_elems(flatten_list(re.split("\s*\}\s*else if\s*\(", if_else_block))) # Split by } else if ( tokens = clean_up_list_elems(flatten_list(list(map(lambda elem: re.split("\s*if\s*\(", elem), tokens)))) # Split by if ( tokens = clean_up_list_elems(flatten_list(list(map(lambda elem: re.split("\s*\}\s*else\s*\{", elem), tokens)))) # Split by } else {, insert True as condition if has_else_clause: tokens.insert(-1, "True") tokens = clean_up_list_elems(flatten_list(list(map(lambda elem: re.split("\s*\)\s*\{", elem), tokens)))) # Split by ) { tokens = clean_up_list_elems(flatten_list(list(map(lambda elem: re.split("\}", elem), tokens)))) # Split by } # Split tokens into list of conditions and statements conditions_list, statements_list = [], [] for index in range(0, len(tokens)): if index % 2 == 0: # Even, is a condition. conditions_list.append(tokens[index]) else: statements_list.append(tokens[index]) assert len(conditions_list) == len(statements_list), "conditions_list and statements_list are of different lengths" # Check the conditions; if True, return the associated statements for index in range(0, len(conditions_list)): curr_condition = evaluate_expression(conditions_list[index]) if type(curr_condition) is not bool: raise InvalidIfElseBlockException("Condition parsed was not a boolean expression. Condition was: " + str(curr_condition)) if curr_condition: return statements_list[index] return None
def dataset_for_WF_multifile(spike, y, N): if type(spike) == np.ndarray: spike = [spike] if type(y) == np.ndarray: y = [y] spike_wiener = [] emg_wiener = [] for i in range(len(spike)): spike_temp, emg_temp = dataset_for_WF(spike[i], y[i], N) spike_wiener.append(spike_temp) emg_wiener.append(emg_temp) return flatten_list(spike_wiener), flatten_list(emg_wiener)
def handle_conditional_statements(if_else_block, instance_vars, stack): #import javarepl# import evaluate_expression has_else_clause = verify_if_else_syntax(if_else_block) # Tokenize if-else block: split conditions from statements tokens = clean_up_list_elems( flatten_list(re.split("\s*\}\s*else if\s*\(", if_else_block))) # Split by } else if ( tokens = clean_up_list_elems( flatten_list( list(map(lambda elem: re.split("\s*if\s*\(", elem), tokens)))) # Split by if ( tokens = clean_up_list_elems( flatten_list( list(map(lambda elem: re.split("\s*\}\s*else\s*\{", elem), tokens)))) # Split by } else {, insert True as condition if has_else_clause: tokens.insert(-1, "True") tokens = clean_up_list_elems( flatten_list( list(map(lambda elem: re.split("\s*\)\s*\{", elem), tokens)))) # Split by ) { tokens = clean_up_list_elems( flatten_list(list(map(lambda elem: re.split("\}", elem), tokens)))) # Split by } # Split tokens into list of conditions and statements conditions_list, statements_list = [], [] for index in range(0, len(tokens)): if index % 2 == 0: # Even, is a condition. conditions_list.append(tokens[index]) else: statements_list.append(tokens[index]) assert len(conditions_list) == len( statements_list ), "conditions_list and statements_list are of different lengths" # Check the conditions; if True, return the associated statements for index in range(0, len(conditions_list)): curr_condition = evaluate_expression(conditions_list[index], instance_vars, stack) if type(curr_condition) is not bool: raise InvalidIfElseBlockException( "Condition parsed was not a boolean expression. Condition was: " + str(curr_condition)) if curr_condition: return statements_list[index] return None
def handle_while(block, instance_vars, stack): validate_while_loop_syntax(block) tokens = clean_up_list_elems(flatten_list(re.split("while\s*\(", block))) tokens = clean_up_list_elems(flatten_list(list(map(lambda x: re.split("\)\s*\{", x), tokens)))) tokens = clean_up_list_elems(flatten_list(list(map(lambda x: re.split("\}", x), tokens)))) assert len(tokens) == 2, "There should be 2 tokens, but there are " + str(len(tokens)) condition, statements = tokens[0], tokens[1] while evaluate_expression(condition, instance_vars, stack): parse_eval(statements, instance_vars, stack) # We will NOT support different scoping for variables inside. return # Call parse_eval with instance_vars
def print_last_thts(bm_fname, optz_cfg=None): # load bm_fname bm = importlib.import_module(bm_fname.rsplit('.', 1)[0]) e = bm.e; decorate_stind(e) compare = bm.compare if optz_cfg is None: optz_cfg = bm.optz_cfg # optz_detail optz_detail = get_optz_detail(bm_fname, optz_cfg) print('\n===== inferred thts: %s =====' % optz_detail) # load res's from files thts_l = [] alg_str_l = [] for alg_str in compare: thts = load_res(optz_detail, alg_str)[-1][1] thts_l += [thts] alg_str_l += [alg_str] # print print('\t%s' % ('\t\t'.join(alg_str_l))) for i in range(len(thts_l[0])/2): thts_i_float = util.flatten_list([[thts[2*i], util.softplus(thts[2*i+1])] for thts in thts_l]) thts_i_str = ['%.3f' % v for v in thts_i_float] print('tht_%d(mean)\t%s' % (i+1, '\t'.join(thts_i_str[0::2]))) print('tht_%d(std )\t%s' % (i+1, '\t'.join(thts_i_str[1::2])))
def introduce_inputs_override(self, num, introduced_stock_changes, introduced_specified_stock, introduced_specified_sales, decimals=10): list_steps = range(self.i, self.num_years*self.spy) if num is None else range(self.i, min(self.i+num*self.spy, self.num_years*self.spy)) if introduced_stock_changes is not None: if num!=len(util.ensure_iterable_and_not_string(introduced_stock_changes)): raise ValueError("length of annual stock_changes must match the number of years to run") if np.any(np.isnan(introduced_stock_changes)): raise ValueError("introduced annual stock_changes cannot be nan") self.stock_changes[list_steps] = np.reshape(np.repeat(introduced_stock_changes/self.spy, self.spy, axis=0), len(list_steps)) if introduced_specified_stock is not None: if num!=len(util.ensure_iterable_and_not_string(introduced_specified_stock)): raise ValueError("length of annual specified_stock must match the number of years to run") self.specified_stock[list_steps] = np.array(util.flatten_list([[[np.nan]*self.num_techs]*(self.spy-1) + [list(util.ensure_iterable_and_not_string(ss))] for ss in np.round(introduced_specified_stock, decimals)])) if np.any(self.specified_stock[list_steps]<0): raise ValueError("introduced specified stock cannot be negative") i = self.i self.prior_year_stock = self.initial_stock if i == 0 else np.sum(self.stock[:, :i + 1, i - 1], axis=1) if np.sum(self.prior_year_stock) + np.sum(self.stock_changes[list_steps])>np.nansum(self.specified_stock[list_steps]) and not np.all(np.isnan(self.specified_stock[list_steps])) and self.stock_changes_as_min and self.use_stock_changes: self.specified_stock[list_steps] *= (np.sum(self.prior_year_stock) + np.sum(self.stock_changes[list_steps]))/np.nansum(self.specified_stock[list_steps]) # self.stock_changes[list_steps] = np.reshape(np.repeat(0/self.spy, self.spy, axis=0), len(list_steps)) # self.specified_stock[list_steps] = np.reshape(np.repeat(introduced_specified_stock, self.spy, axis=0), len(list_steps)) if introduced_specified_sales is not None: if num!=len(util.ensure_iterable_and_not_string(introduced_specified_sales)): raise ValueError("length of annual specified_sales must match the number of years to run") self.specified_sales[list_steps] = np.reshape(np.repeat(np.round(introduced_specified_sales, decimals)/self.spy, self.spy, axis=0), len(list_steps)) if np.any(self.specified_sales[list_steps]<0): raise ValueError("introduced specified sales cannot be negative")
def get_labels(authors, POS): """Returns true labels for list of authors. Given a list of authors, returns a list of integers representing the documents for all documents written by an author in your author list. These integers represent 'true labels'. Args: authors (list): A list of strings of Author names. POS (boolean): A boolean value representing whether you want the original document or the POS-converted document. Returns: list: The length of this list is equal to the total number of documents written by authors in your authors list. These integers are subject to the requirement that any two documents written by the same author will be represented by the same integer. """ author_lengths = [ len(i) for i in [auth_paths(auth, POS) for auth in authors] ] unflattened_labels = [ author_length * [i] for author_length, i in [(author_lengths[i], i) for i in range(len(author_lengths))] ] return flatten_list(unflattened_labels)
def set_average_net_loads(self, total_net_load): df = total_net_load.copy() df['period'] = util.flatten_list([[p]*self.period_lengths[p] for p in self.periods])*len(cfg.dispatch_geographies) df = df.set_index(['period'], append=True) self.period_net_load = df.groupby(level=[self.dispatch_geography, 'period']).sum().squeeze().to_dict() self.average_net_load = df.groupby(level=[self.dispatch_geography]).sum()/float(len(self.periods)) self.average_net_load = self.average_net_load[self.average_net_load.columns[0]].to_dict()
def handle_while(block, instance_vars, stack): validate_while_loop_syntax(block) tokens = clean_up_list_elems(flatten_list(re.split("while\s*\(", block))) tokens = clean_up_list_elems(flatten_list(list(map(lambda x: re.split("\)\s*\{", x), tokens)))) tokens = clean_up_list_elems(flatten_list(list(map(lambda x: re.split("\}", x), tokens)))) print("tokens: " + str(tokens)) assert len(tokens) == 2, "There should be 2 tokens, but there are " + str(len(tokens)) condition, statements = tokens[0], tokens[1] while evaluate_expression(condition): parse_eval(statements, instance_vars, stack) # We will NOT support different scoping for variables inside. return # Call parse_eval with instance_vars
def set_average_net_loads(self, total_net_load): df = total_net_load.copy() df['period'] = util.flatten_list([[p]*self.period_lengths[p] for p in self.periods])*len(GeoMapper.dispatch_geographies) df = df.set_index(['period'], append=True) self.period_net_load = df.groupby(level=[self.dispatch_geography, 'period']).sum().squeeze().to_dict() self.average_net_load = df.groupby(level=[self.dispatch_geography]).sum()/float(len(self.periods)) self.average_net_load = self.average_net_load[self.average_net_load.columns[0]].to_dict()
def __check_ev_paths__(self,EvFilesPath,SkipEvFiles,EvPatternsPath,SkipEvPatterns): ev_files_list,ev_patterns_list = [],[] if self.chk_subdirectories: __get_dir_contents__ = lambda a_dir:util.SysCommand('find %s'%a_dir) else: __get_dir_contents__ = lambda a_dir:util.GetFileListing(os.path.join(a_dir,'*.*'),'-1') get_files = lambda search_expr,a_dir: util.keep_in_list(__get_dir_contents__(a_dir).output,search_expr) if not SkipEvFiles: ev_files_list = util.unique_sub_list(util.flatten_list(\ [get_files(self.EvFileExpr,a_dir) for a_dir in EvFilesPath])) if len(ev_files_list)>0: setattr(self,'ev_files_list',ev_files_list) if not SkipEvPatterns: ev_patterns_list = util.unique_sub_list(util.flatten_list(\ [get_files(self.EvPatternExpr,a_dir) for a_dir in EvPatternsPath])) if len(ev_patterns_list)>0: setattr(self,'ev_patterns_list',ev_patterns_list) return ((len(ev_files_list)+len(ev_patterns_list))>0)
def set_timeperiods(self): """sets optimization periods based on selection of optimization hours in the dispatch configuration sets: period_hours = range from 1 to the number of opt_hours periods = range from 1 to the maximum number of periods (i.e. 8760/period_hours) period_timepoints = dictionary with keys of period and values of period hours period_flex_load_timepoints = dictionary with keys of period and values of a nested dictionary with the keys of period_hours and the values of those period hours offset by the flexible_load_constraint_offset configuration parameter """ if hasattr(self, 'hours'): return self.num_hours = len(shape.shapes.active_dates_index) self.hours = range(self.num_hours) num_periods = int(round(self.num_hours / float(cfg.opt_period_length))) self.periods = range(num_periods) split_hours = [ list(a) for a in np.array_split(self.hours, num_periods) ] # splits into roughly equal lengths self.period_lengths = dict( zip(self.periods, [len(a) for a in split_hours])) self.period_timepoints = dict(zip(self.periods, split_hours)) self.period_previous_timepoints = dict( zip(self.periods, [dict(zip(*(a, util.rotate(a, 1)))) for a in split_hours])) self.period_repeated = util.flatten_list([[p] * self.period_lengths[p] for p in self.periods])
def detect_if_king_is_mate(colour, pieces): possible_king = filter(lambda piece: piece.letter == 'K' and piece.colour == colour, pieces) if len(possible_king) == 0: return False analyze_threats_on_board(pieces) pieces_of_this_colour = filter(lambda piece: piece.colour == colour, pieces) piece_moves_all_move_results = map(lambda piece: piece.inspect_moves_for_piece(pieces, all_chess_coords), pieces_of_this_colour) def any_valid_move_available(piece, moves_move_results): def check_for_valid(move, move_result): return move_result.is_valid_move return map(lambda move_move_result: check_for_valid(move_move_result['move'], move_move_result['move_result']), moves_move_results) any_valid_move = map(lambda piece_moves_move_results: any_valid_move_available(piece_moves_move_results['piece'], piece_moves_move_results['moves_move_result']), piece_moves_all_move_results) any_valid_move_flat = util.flatten_list(any_valid_move) valid_move_exists = (True in any_valid_move_flat) return not valid_move_exists
def __init__(self): self.geographies = OrderedDict() self.geography_names = dict(util.sql_read_table('GeographiesData', ['id', 'name'], return_unique=True, return_iterable=True)) # this is used for outputs self.timezone_names = {} self.map_keys = [] self.read_geography_indicies() self.gau_to_geography = dict(util.flatten_list([(v, k) for v in vs] for k, vs in self.geographies.iteritems())) self.id_to_geography = dict((k, v) for k, v in util.sql_read_table('Geographies')) self.read_geography_data() self._create_composite_geography_levels() self.geographies_unfiltered = copy.copy(self.geographies) # keep a record self._update_geographies_after_subset()
def create_samples_xy_rnn_list(input_x_list, input_y_list, lags, transpose): if type(input_x_list) == np.ndarray: input_x_list = [input_x_list] if type(input_y_list) == np.ndarray: input_y_list = [input_y_list] dataX, dataY = [], [] for x, y in zip(input_x_list, input_y_list): print(len(x)) temp_x, temp_y = create_samples_xy_rnn(x, y, lags, transpose) dataX.append(temp_x) dataY.append(temp_y) return flatten_list_3d(dataX), flatten_list(dataY)
def handle_for(block, instance_vars, stack): validate_for_loop_syntax(block) tokens = clean_up_list_elems(flatten_list(re.split("for\s*\(", block))) tokens = clean_up_list_elems(flatten_list(list(map(lambda x: re.split("\)\s*\{", x), tokens)))) tokens = clean_up_list_elems(flatten_list(list(map(lambda x: re.split("\}", x), tokens)))) print("tokens: " + str(tokens)) tokens = tokens[0].split(";") + [tokens[1]] print("tokens: " + str(tokens)) initialize, condition, update, statements = tokens print("tokens: " + str(tokens)) assign_variable(initialize, instance_vars, stack) evaluated_condition = evaluate_expression(condition) if type(evaluated_condition) is not bool: raise InvalidForLoopException("Boolean condition is of wrong type") while evaluated_condition: parse_eval(statements, instance_vars, stack) assign_variable(update, instance_vars, stack) return
def __get_dest_sub_dirs__(self,path_list): chk_list = [ [j for j in xrange(len(path_list)) if j!=i] for i,a_path in enumerate(path_list) ] common_index = min(\ min(\ max(j for j,a_dir,b_dir in zip(util.InfiniteCounter(start=-1),a_path.split('/'),path_list[i].split('/')) \ if cmp(b_dir,a_dir)==0 ) \ for i in a_range )\ for a_range,a_path in zip(chk_list,path_list) ) ci = common_index+1 tree = '/'.join(path_list[0].split('/')[:ci])+'/' branches = util.unique_sub_list([a_path.replace(tree,'').split('/')[0] for a_path in path_list]) return util.flatten_list([[a_path.replace(tree,'') for a_path in path_list \ if cmp(a_branch,a_path.split('/')[ci]) == 0] for a_branch in branches])
def setUp(self): self.res = RESOLUTIONS['TESTBIG'] db = DoubleBuffer(self.res.words_per_line + 1, read_domain='sync', write_domain='sync') self.db_write = db.write self.add(db, 'db') self.vt = VideoTimer(self.res) self.add(self.vt, 'vt') self.reader = DoubleBufferReaderRGB(self.vt, db.read) self.add(self.reader, 'reader') # list of frames # each frame has 44 lines of 4 words def make_frame(c): return [[c * 0x1000 + j * 0x10 + i for i in range(4)] for j in range(44)] self.frames = [make_frame(c + 1) for c in range(3)] self.bits = all_bits_list(flatten_list(flatten_list(self.frames))) self.extra_processes.append(self.writer)
def get_latest_videos_from_channel_ids(channel_ids): max_results = 50 videos = [] for channel_id in channel_ids: channel_videos = VIDEOS_CACHE.get_from_cache(channel_id) if channel_videos is None: channel_videos = edict(_search_videos(channel_id=channel_id, max_results=max_results)) # channel_videos = edict(load_file('sample_channel_videos_response.json')) channel_videos = VIDEOS_CACHE.add_to_cache(key=channel_id, data=process_video_records(channel_videos), duration=FOUR_HOURS_IN_SECONDS) videos.append(channel_videos) return flatten_list(videos)
def import_data(data, index, dtype, step=1000): data_to_import = zip([{ "create": { "_index": index, "_type": dtype, "_id": i } } for i in range(len(data))], data) data_to_import = list(flatten_list(map(list, data_to_import))) count = 0 while count < len(data_to_import): es.bulk(body=data_to_import[count:count + step], refresh=True) count += step print('Loaded {} records to elasticsearch'.format(len(data)))
def rng_process(self): yield Passive() # Set new data whenever enable is set for word in flatten_list(self.rng_data): yield self.lw.rng_in.eq(word) yield while not (yield self.lw.rng_enable): yield yield # Allow one more enable # That's all the data we have while not (yield self.lw.rng_enable): yield fail("Requested more random numbers than expected")
def initialize_specified_stock(self, specified_stock): """ Stock gets specified in the past period of the year """ shape = (self.num_years * self.spy, self.num_techs) if specified_stock is None: self.specified_stock = np.empty(shape) self.specified_stock.fill(np.nan) else: self.specified_stock = np.array( util.flatten_list( [[[np.nan] * self.num_techs] * (self.spy - 1) + [list(util.ensure_iterable(ss))] for ss in specified_stock])) if np.any(self.specified_stock < -1E-9): raise ValueError( "Specified stock cannot be initialized with negative numbers") self.specified_stock = np.clip(self.specified_stock, 0, None)
def initialize_specified_stock(self, specified_stock): """ Stock gets specified in the past period of the year """ shape = (self.num_years*self.spy, self.num_techs) if specified_stock is None: self.specified_stock = np.empty(shape) self.specified_stock.fill(np.nan) else: self.specified_stock = np.array(util.flatten_list([[[np.nan]*self.num_techs]*(self.spy-1) + [list(util.ensure_iterable_and_not_string(ss))] for ss in specified_stock])) # if self.spy == 1: # self.specified_stock = specified_stock # else: # self.specified_stock = np.array(util.flatten_list([[[np.nan]*self.num_techs]*(self.spy-1) + [list(util.ensure_iterable_and_not_string(ss))] for ss in specified_stock])) # if self.num_techs == 1: # self.specified_stock = self.specified_stock.flatten() if np.any(self.specified_stock<0): raise ValueError("Specified stock cannot be initialized with negative numbers")
def run(): email_leads = util.read_email_excel_leads() email_leads = fulgencio.filter_results(email_leads) emails = [ util.get_list_from_print(string_list) for string_list in list(email_leads['emails']) ] emails = util.flatten_list(emails) with open('email_body.txt', 'r', encoding='latin-1') as email_body: with open('email_subject.txt', 'r', encoding='latin-1') as email_subject: if not DEBUG: mail_gun_post(emails, email_subject.read(), email_body.read()) fulgencio.save_leads_in_api(email_leads) print('Se enviaron: ', len(email_leads['emails']), 'correos')
def set_timeperiods(self): """sets optimization periods based on selection of optimization hours in the dispatch configuration sets: period_hours = range from 1 to the number of opt_hours periods = range from 1 to the maximum number of periods (i.e. 8760/period_hours) period_timepoints = dictionary with keys of period and values of period hours period_flex_load_timepoints = dictionary with keys of period and values of a nested dictionary with the keys of period_hours and the values of those period hours offset by the flexible_load_constraint_offset configuration parameter """ if hasattr(self,'hours'): return self.num_hours = len(shape.shapes.active_dates_index) self.hours = range(self.num_hours) num_periods = int(round(self.num_hours / float(cfg.opt_period_length))) self.periods = range(num_periods) split_hours = [list(a) for a in np.array_split(self.hours, num_periods)] # splits into roughly equal lengths self.period_lengths = dict(zip(self.periods, [len(a) for a in split_hours])) self.period_timepoints = dict(zip(self.periods, split_hours)) self.period_previous_timepoints = dict(zip(self.periods, [dict(zip(*(a, util.rotate(a,1)))) for a in split_hours])) self.period_repeated = util.flatten_list([[p]*self.period_lengths[p] for p in self.periods])
def get_matrix(authors, POS, ngram_range): """Builds design matrix and runs TFIDF. Given a list of authors, builds a design matrix on n-gram feature sets over all documents (either POS or words) for all authors. Args: authors (list): A list of strings of Author names. POS (boolean): A boolean value representing whether you want the original documents or the POS-converted documents. Returns: numpy.ndarray: The vectorized documents fetched by auth_paths. M = Number of Features N = Number of documents The matrix value at (n, m) represents the number of occurences of the mth feature (n-gram) in the nth documents, re-weighted by TDIDF. """ paths = [auth_paths(i, POS) for i in authors] vectorizer = TfidfVectorizer(input='filename', ngram_range=ngram_range) return vectorizer.fit_transform(flatten_list(paths)).toarray()
def supervised_improvement(matrix, cluster_cores): """Classifies the documents based on core elements. Clusters the documents represented by the design matrix using core elements listed in cluster_cores. Args: matrix (numpy.ndarray): A design matrix representing features of each document. cluster_cores (list): A list of cluster cores to be used by the classifier. Returns: list: The predicted label for each document by a Random Forest classifier, having been trained on the cluster cores found from Spectral Clustering. """ y = flatten_list([[i] * len(cluster_cores[i]) for i in range(len(cluster_cores))]) matrix_trained = np.vstack([matrix[core] for core in cluster_cores]) clf = random_forest() clf.fit(matrix_trained, y) return clf.predict(matrix)
def train_model_and_eval(lvl2_label_name, data, true_positives_all, true_negatives_all, params): """Will train, evaluate and update the true positive nested_list""" (xtrain_array, ytrain_array, xtest_array, y_test) = data """ models.cross_validate(model=models.level2_model, epochs=epochs, batch_size=batch_size, num_dimensions=num_dimensions, seed=seed, xtrain_array=xtrain_array, ytrain_array=y_train) """ # --------------------- EVAL RESULTS --------------------- m2 = train_model(params=params, data=(xtrain_array, ytrain_array, xtest_array, y_test)) flatten_test_prediction = util.flatten_list(m2.predict(xtest_array)) scaled_prediction = util.scale_score_by_frequency(flatten_test_prediction, ytrain_array) result.print_partial_results(lvl2_label_name, y_test, scaled_prediction, true_positives_all, true_negatives_all)
def add_has_valence_extension_to_matches(matches): for match in matches: tokens = util.flatten_list(match.values()) for token in tokens: if token._.valence: setattr(token._, 'has_valence', True)
def eval(e, thts, env={}): """ Args: - e : Expr - thts : float array - env : (str -> float) dict Returns: - retvl : float - logpq : float - glogq : float list - xs : float list where - env[var_str] = return value of Var(var_str) - retvl = return value - logpq = log p(xs,Y) - log q_thts(xs) - glogq = \grad_\THT log q_\THT(xs) |_{\THT=thts} - xs = samples values here capital math symbols denote vectors. """ if isinstance(e, Cnst): retvl = e.c logpq = 0.0 glogq = [] xs = [] elif isinstance(e, Var): assert(e.v in env) retvl = env[e.v] logpq = 0.0 glogq = [] xs = [] elif isinstance(e, Linear): retvl = e.c0 + sum([ci*env[vi] for (ci,vi) in e.cv_l]) logpq = 0.0 glogq = [] xs = [] elif isinstance(e, App): # recursive calls num_args = len(e.args) (retvl_sub, logpq_sub, glogq_sub, xs_sub)\ = zip(*[ eval(e.args[i], thts, env) for i in range(num_args) ]) # compute: all op = App.OP_DICT[num_args][e.op] retvl = op(*[retvl_sub[i] for i in range(num_args)]) logpq = np.sum(logpq_sub) glogq = util.flatten_list(glogq_sub) xs = util.flatten_list( xs_sub) elif isinstance(e, If): # recursive calls (retvl_1, logpq_1, glogq_1, xs_1) = eval(e.e1, thts, env) (retvl_r, logpq_r, glogq_r, xs_r) = (eval(e.e2, thts, env) if retvl_1 > 0 else\ eval(e.e3, thts, env)) # compute: all retvl = retvl_r logpq = logpq_1 + logpq_r if retvl_1 > 0: glogq = glogq_1 + glogq_r + [0.]*get_num_thts(e.e3) else: glogq = glogq_1 + [0.]*get_num_thts(e.e2) + glogq_r xs = xs_1 + xs_r elif isinstance(e, Let): # recursive calls (retvl_1, logpq_1, glogq_1, xs_1) = eval(e.e1, thts, env) env_new = util.copy_add_dict(env, {e.v1.v : retvl_1}) (retvl_2, logpq_2, glogq_2, xs_2) = eval(e.e2, thts, env_new) # compute: all retvl = retvl_2 logpq = logpq_1 + logpq_2 glogq = glogq_1 + glogq_2 xs = xs_1 + xs_2 elif isinstance(e, Sample): # recursive calls (retvl_1, logpq_1, glogq_1, xs_1) = eval(e.e1, thts, env) (retvl_2, logpq_2, glogq_2, xs_2) = eval(e.e2, thts, env) # compute: x_3 stind = e.stind['thts'] x_3 = np.random.normal(thts[stind], util.softplus(thts[stind+1])) # do sampling # compute: log p(x|p_loc,p_scale) - log q(x|q_loc,q_scale) (p_loc, p_scale) = (retvl_1, retvl_2) (q_loc, q_scale) = (thts[stind], util.softplus(thts[stind+1])) logpq_3 = (scipy.stats.norm.logpdf(x_3, p_loc, p_scale) -\ scipy.stats.norm.logpdf(x_3, q_loc, q_scale)) # compute: \grad_\tht log q_\tht(x) |_{x=x_3, \tht=thts[stind:stind+2]} glogq_3 = list(grad_norm_logpdf_tht(x_3, thts[stind:stind+2])) # compute: all retvl = x_3 logpq = logpq_1 + logpq_2 + logpq_3 glogq = glogq_1 + glogq_2 + glogq_3 xs = xs_1 + xs_2 + [x_3] elif isinstance(e, Fsample): # recursive calls (retvl_1, logpq_1, glogq_1, xs_1) = eval(e.e1, thts, env) (retvl_2, logpq_2, glogq_2, xs_2) = eval(e.e2, thts, env) # compute: all retvl = np.random.normal(retvl_1, retvl_2) # do sampling logpq = logpq_1 + logpq_2 glogq = glogq_1 + glogq_2 xs = xs_1 + xs_2 elif isinstance(e, Observe): # recursive calls num_args = len(e.args) (retvl_sub, logpq_sub, glogq_sub, xs_sub)\ = zip(*[ eval(e.args[i], thts, env) for i in range(num_args) ]) # compute: log p(c|p_loc,p_scale) dstr_logpdf = Observe.DSTR_DICT[e.dstr] logpq_cur = dstr_logpdf(e.c1.c, *[retvl_sub[i] for i in range(num_args)]) # compute: all retvl = e.c1.c logpq = np.sum(logpq_sub) + logpq_cur glogq = util.flatten_list(glogq_sub) xs = util.flatten_list( xs_sub) else: assert(False) return (retvl, logpq, glogq, xs)
def get_all_squares_the_enemy_threatens(self, pieces): enemy_pieces = filter(lambda piece: piece.colour != self.colour, pieces) squares_unflattened = map(lambda piece: piece.is_threat_to_these_squares, enemy_pieces) return flatten_list(squares_unflattened)
from board_parts import NUM_ROWS, NUM_COLS, GridCoord, grid_coord_to_chess_coord import util def make_row(columns): return lambda row_num: zip(([row_num] * len(columns)), columns) all_coords = util.flatten_list(map(make_row(NUM_COLS), NUM_ROWS)) all_grid_coords = map(lambda coords: GridCoord(coords[0], coords[1]), all_coords) all_chess_coords = map(grid_coord_to_chess_coord, all_grid_coords)
def do_sample(e, thts, env={}): """ Summary: do sampling for Sample and Fsample Args: - e : Expr - thts : float array - env : (str -> float) dict Returns: - retvl : float - xs_s : float list - xs_f : float list where - env[var_str] = return value of Var(var_str) as float - retvl = return value - xs_s = sampled values for Sample (from approximating distribution) - xs_f = sampled values for Fsample (from prior distribution) """ if isinstance(e, Cnst): retvl = e.c xs_s = [] xs_f = [] elif isinstance(e, Var): assert (e.v in env) retvl = env[e.v] xs_s = [] xs_f = [] elif isinstance(e, Linear): retvl = e.c0 + sum([ci * env[vi] for (ci, vi) in e.cv_l]) xs_s = [] xs_f = [] elif isinstance(e, App): # recursive calls num_args = len(e.args) (retvl_sub, xs_s_sub, xs_f_sub)\ = zip(*[ do_sample(e.args[i], thts, env) for i in range(num_args) ]) # compute: all op = App.OP_DICT[num_args][e.op] retvl = op(*[retvl_sub[i] for i in range(num_args)]) xs_s = util.flatten_list(xs_s_sub) xs_f = util.flatten_list(xs_f_sub) elif isinstance(e, If): # recursive calls (retvl_1, xs_s_1, xs_f_1) = do_sample(e.e1, thts, env) e_next = e.e2 if retvl_1 > 0 else\ e.e3 (retvl_r, xs_s_r, xs_f_r) = do_sample(e_next, thts, env) # compute: all retvl = retvl_r xs_s = xs_s_1 + xs_s_r xs_f = xs_f_1 + xs_f_r elif isinstance(e, Let): # recursive calls (retvl_1, xs_s_1, xs_f_1) = do_sample(e.e1, thts, env) env_new = util.copy_add_dict(env, {e.v1.v: retvl_1}) (retvl_2, xs_s_2, xs_f_2) = do_sample(e.e2, thts, env_new) # compute: all retvl = retvl_2 xs_s = xs_s_1 + xs_s_2 xs_f = xs_f_1 + xs_f_2 elif isinstance(e, Sample): # recursive calls (retvl_1, xs_s_1, xs_f_1) = do_sample(e.e1, thts, env) (retvl_2, xs_s_2, xs_f_2) = do_sample(e.e2, thts, env) # sample: x_3 from approximating distribution stind = e.stind['thts'] x_3 = np.random.normal(thts[stind], util.softplus(thts[stind + 1])) # compute: all retvl = x_3 xs_s = xs_s_1 + xs_s_2 + [x_3] # add to xs_s xs_f = xs_f_1 + xs_f_2 elif isinstance(e, Fsample): # recursive calls (retvl_1, xs_s_1, xs_f_1) = do_sample(e.e1, thts, env) (retvl_2, xs_s_2, xs_f_2) = do_sample(e.e2, thts, env) # sample: x_3 from prior distribution x_3 = np.random.normal(retvl_1, retvl_2) # compute: all retvl = x_3 xs_s = xs_s_1 + xs_s_2 xs_f = xs_f_1 + xs_f_2 + [x_3] # add to xs_f elif isinstance(e, Observe): # recursive calls num_args = len(e.args) (retvl_sub, xs_s_sub, xs_f_sub)\ = zip(*[ do_sample(e.args[i], thts, env) for i in range(num_args) ]) # compute: all retvl = e.c1.c xs_s = util.flatten_list(xs_s_sub) xs_f = util.flatten_list(xs_f_sub) else: assert (False) return (retvl, xs_s, xs_f)
def export_results_to_db(self): scenario_run_id = util.active_scenario_run_id(self.scenario_id) # Levelized costs costs = self.outputs.c_costs.groupby(level=['SUPPLY/DEMAND', 'YEAR']).sum() util.write_output_to_db(scenario_run_id, 1, costs) #Energy energy = self.outputs.c_energy.xs('FINAL', level='ENERGY ACCOUNTING')\ .groupby(level=['SECTOR', 'FINAL_ENERGY', 'YEAR']).sum() # Energy demand by sector util.write_output_to_db(scenario_run_id, 2, energy.groupby(level=['SECTOR', 'YEAR']).sum()) # Residential Energy by Fuel Type util.write_output_to_db(scenario_run_id, 6, energy.xs('RESIDENTIAL', level='SECTOR')) # Commercial Energy by Fuel Type util.write_output_to_db(scenario_run_id, 8, energy.xs('COMMERCIAL', level='SECTOR')) # Transportation Energy by Fuel Type util.write_output_to_db(scenario_run_id, 10, energy.xs('TRANSPORTATION', level='SECTOR')) # Productive Energy by Fuel Type util.write_output_to_db(scenario_run_id, 12, energy.xs('PRODUCTIVE', level='SECTOR')) #Emissions emissions = self.outputs.c_emissions.xs('DOMESTIC', level='EXPORT/DOMESTIC')\ .groupby(level=['SECTOR', 'FINAL_ENERGY', 'YEAR']).sum() emissions = util.DfOper.mult((emissions, 1-(emissions.abs()<1E-10).groupby(level='FINAL_ENERGY').all())) # get rid of noise # Annual emissions by sector util.write_output_to_db(scenario_run_id, 3, emissions.groupby(level=['SECTOR', 'YEAR']).sum()) # Residential Emissions by Fuel Type util.write_output_to_db(scenario_run_id, 7, emissions.xs('RESIDENTIAL', level='SECTOR')) # Commercial Emissions by Fuel Type util.write_output_to_db(scenario_run_id, 9, emissions.xs('COMMERCIAL', level='SECTOR')) # Transportation Emissions by Fuel Type util.write_output_to_db(scenario_run_id, 11, emissions.xs('TRANSPORTATION', level='SECTOR')) # Productive Emissions by Fuel Type util.write_output_to_db(scenario_run_id, 13, emissions.xs('PRODUCTIVE', level='SECTOR')) # Domestic emissions per capita annual_emissions = self.outputs.c_emissions.xs('DOMESTIC', level='EXPORT/DOMESTIC').groupby(level=['YEAR']).sum() population_driver = self.demand.drivers[2].values.groupby(level='year').sum().loc[annual_emissions.index] population_driver.index.name = 'YEAR' factor = 1E6 df = util.DfOper.divi((annual_emissions, population_driver)) * factor df.columns = ['TONNE PER CAPITA'] util.write_output_to_db(scenario_run_id, 4, df) # Electricity supply electricity_node_names = [self.supply.nodes[nodeid].name.upper() for nodeid in util.flatten_list(self.supply.injection_nodes.values())] df = self.outputs.c_energy.xs('ELECTRICITY', level='FINAL_ENERGY')\ .xs('EMBODIED', level='ENERGY ACCOUNTING')\ .groupby(level=['SUPPLY_NODE', 'YEAR']).sum() util.write_output_to_db(scenario_run_id, 5, df.loc[electricity_node_names])
def get_matches(pattern, docs): matches = [pattern.match(doc) for doc in docs] matches = util.flatten_list(matches) return matches
token_rows = [util.unpack_json_field(row, 'data') for row in token_rows] new_training_match_slots = {} for label, tokens in training_match_slots.items(): new_training_match_slots[label] = [] for token in tokens: indices_to_check = range(len(token_rows)) starting_index = token['token_offset'] indices_behind = [ idx for idx in indices_to_check if idx < starting_index ] indices_ahed = [ idx for idx in indices_to_check if idx > starting_index ] check_idxs = zip(reversed(indices_behind), indices_ahed) check_idxs = util.flatten_list(check_idxs) check_idxs.insert(0, starting_index) token_found = False for idx in check_idxs: token_row = token_rows[idx] tokens_are_equal = token_row['text'] == token['text'] if tokens_are_equal: token_found = token_row break if not token_found: print('token not found:', label, token) raise else: new_training_match_slots[label].append(token_found) training_match_feature_dict = {
optional_args = {} ''' for ARG in ASM_ARGS: sp = ARG.split("=") if len(sp) != 2: raise Exception("Invalid option format " + str(ARG)) try: if sp[0][0] != "-": raise Exception("Invalid option format " + str(ARG)) optional_args[sp[0][1:]] = int(sp[1]) except ValueError: raise Exception("Invalid option format " + str(ARG)) ''' for ARG in util.flatten_list(ARGS["asm_vars"]): sp = ARG.split("=") if len(sp) != 2: raise Exception("Invalid option format " + str(ARG)) try: optional_args[sp[0]] = int(sp[1], 16) except ValueError: raise Exception("Invalid option format " + str(ARG)) #print(optional_args) force_assembly = False if ARGS["force_assemble"] == True: