def test_partition(self): given_list = [1, 2, 3, 4, 5, 6, 7, 8] indices1 = [1] indices2 = [1, 5] indices3 = [1, 3, 5] ans1 = [[1], [2, 3, 4, 5, 6, 7, 8]] ans2 = [[1], [2, 3, 4, 5], [6, 7, 8]] ans3 = [[1], [2, 3], [4, 5], [6, 7, 8]] self.assertEqual(utils.partition(given_list, indices1), ans1) self.assertEqual(utils.partition(given_list, indices2), ans2) self.assertEqual(utils.partition(given_list, indices3), ans3)
def __init__(self, expression, preconds, effects): if isinstance(expression, str): expression = expr(expression) preconds = [expr(p) if not isinstance(p, Expr) else p for p in preconds] effects = [expr(e) if not isinstance(e, Expr) else e for e in effects] self.name = expression.op self.args = expression.args self.subst = None precond_neg, precond_pos = partition(preconds, is_negative_clause) self.precond_pos = set(precond_pos) self.precond_neg = set(e.args[0] for e in precond_neg) # change the negative Exprs to positive for evaluation effect_rem, effect_add = partition(effects, is_negative_clause) self.effect_add = set(effect_add) self.effect_rem = set(e.args[0] for e in effect_rem) # change the negative Exprs to positive for evaluation
def __init_workStack__(self): max_depth = max(list(map(lambda x: x.depth, self.trimList))) #print(max_depth) it1, it2 = utils.partition(self.trimList, lambda x: x.depth == max_depth) self.next_workList = list(it1) self.workList = list(it2)
def traverse_ast(self): next_workList = [] curr_depth = 0 next_depth = -1 while (len(self.workList) > 0): node = self.workList.pop(0) curr_depth = node.depth next_depth = curr_depth - 1 if (utils.isConst(node) or self.completed[node.depth].__contains__(node)): #### #for out in self.bwdDeriv[node].keys(): # print("Debug-deriv:", type(node).__name__) # print("Node expr:", node.f_expression) # print("Deriv: ", self.bwdDeriv[node][out]) # print("---------\n\n") #### pass elif (self.converge_parents(node)): self.visit_node_deriv(node) else: self.workList.append(node) if (len(self.workList) == 0 and next_depth != -1 and len(self.next_workList) != 0): nextIter, currIter = utils.partition(self.next_workList, \ lambda x:x.depth==next_depth) self.workList = list(set(currIter)) self.next_workList = list(set(nextIter))
def get(self): user = g.user generate_friend_recs(user) count, page, users = self.select().key(skey(user, 'friend', 'suggestions')).execute() if count == 0: count, page, users = self.select().group(g.group).execute() if count == 0: count, page, users = self.select().execute() fb, other = partition(users, lambda u: True if hasattr(u, 'score') and u.score >= 10000 else False) shuffle(fb) shuffle(other) users = fb + other for u in users: if hasattr(u, 'score'): score = u.score delattr(u, 'score') if score >= 10000: score -= 10000 if score < 100000: u.num_friends_in_common = int(score) else: u.num_friends_in_common = 0 else: u.num_friends_in_common = 0 return self.serialize_list(User, users, count, page), 200, { 'Cache-Control': 'max-age=60' }
def _get_third_party_python_libs_directory_contents(): """Returns a dictionary containing all of the normalized libraries name strings with their corresponding version strings installed in the 'third_party/python_libs' directory. Returns: dict(str, str). Dictionary with the normalized name of the library installed as the key and the version string of that library as the value. """ direct_url_packages, standard_packages = utils.partition( pkg_resources.find_distributions(common.THIRD_PARTY_PYTHON_LIBS_DIR), predicate=lambda dist: dist.has_metadata('direct_url.json')) installed_packages = { pkg.project_name: pkg.version for pkg in standard_packages } for pkg in direct_url_packages: metadata = json.loads(pkg.get_metadata('direct_url.json')) version_string = '%s+%s@%s' % ( metadata['vcs_info']['vcs'], metadata['url'], metadata['vcs_info']['commit_id']) installed_packages[pkg.project_name] = version_string # Libraries with different case are considered equivalent libraries: # e.g 'Flask' is the same library as 'flask'. Therefore, we # normalize all library names in order to compare libraries without # ambiguities. directory_contents = { normalize_python_library_name(library_name): version_string for library_name, version_string in installed_packages.items() } return directory_contents
def loop_filter(l, comp): rank = 0 while len(l) > 1: o, z = map(list, partition(lambda x: x[rank] == '1', l)) l = o if comp(o, z) else z rank += 1 return l[0]
def find_best_split(rows): """ calculate the gain of a question and what question is best to ask""" best_gain = 0 best_question = None current_uncertainty = calculate_gini(rows) n_features = len(rows[0]) - 1 for col in range(n_features): values = distinct_attributes(rows, col) for val in values: question = Question(col, val) true_rows, false_rows = partition(rows, question) if len(true_rows) == 0 or len(false_rows) == 0: continue gain = info_gain(true_rows, false_rows, current_uncertainty) if gain >= best_gain: best_gain, best_question = gain, question return best_gain, best_question
def main(self, user_ids: List[str]): '''Function to run async loop. Argument should be a list of user IDs to retrieve in Alma. Returns 1) barcode and other data for users with matching records in an IZ, and 2) users with no match in any IZ.''' user_data = {} # Loop through available Alma API keys in order. Allows querying of multiple IZ's. for apikey in self.apikeys: # Create request header self.headers = { 'Authorization': f"apikey {apikey}", 'Accept': 'application/json' } results = asyncio.run(self._retrieve_user_records(user_ids)) # Valid results have the record_type key errors, results = partition(lambda x: x and 'record_type' in x, results) # Extract barcodes and user groups as mapping to user IDs user_data.update(self._extract_info(results)) # Get the remaining user ID's to query user_ids = [ e['User ID'] for e in errors if e['Error'] == 'User Not Found' ] if not user_ids: break # Log user ID's that could not be found #if user_ids: # self.logger.error(f"Users could not be found in any IZ: {user_ids}") return user_data, user_ids
def run(args, seed): setup_seed(seed) adj, features, labels, idx_train, idx_val, idx_test = load_data( args['dataset']) node_num = features.size()[0] class_num = labels.numpy().max() + 1 adj = adj.cuda() features = features.cuda() labels = labels.cuda() loss_func = nn.CrossEntropyLoss() early_stopping = 10 adj_raw = load_adj_raw(args['dataset']).tocoo() ss_labels = partition(adj_raw, args['partitioning_num']).cuda() net_gcn = net.net_gcn_multitask(embedding_dim=args['embedding_dim'], ss_dim=args['partitioning_num']) net_gcn = net_gcn.cuda() optimizer = torch.optim.Adam(net_gcn.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) loss_val = [] for epoch in range(1000): optimizer.zero_grad() output, output_ss = net_gcn(features, adj) loss_target = loss_func(output[idx_train], labels[idx_train]) loss_ss = loss_func(output_ss, ss_labels) loss = loss_target * args['loss_weight'] + loss_ss * ( 1 - args['loss_weight']) # print('epoch', epoch, 'loss', loss_target.data) loss.backward() optimizer.step() # validation with torch.no_grad(): output, _ = net_gcn(features, adj, val_test=True) loss_val.append( loss_func(output[idx_val], labels[idx_val]).cpu().numpy()) # print('val acc', f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro')) # early stopping if epoch > early_stopping and loss_val[-1] > np.mean( loss_val[-(early_stopping + 1):-1]): break # test with torch.no_grad(): output, _ = net_gcn(features, adj, val_test=True) acc_val = f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro') acc_test = f1_score(labels[idx_test].cpu().numpy(), output[idx_test].cpu().numpy().argmax(axis=1), average='micro') return acc_val, acc_test
def test_partition(self): is_even = lambda n: (n % 2) == 0 evens, odds = (utils.partition([10, 8, 1, 5, 6, 4, 3, 7], predicate=is_even)) self.assertEqual(list(evens), [10, 8, 6, 4]) self.assertEqual(list(odds), [1, 5, 3, 7])
def test_enumerated_partition(self) -> None: logs = ['ERROR: foo', 'INFO: bar', 'INFO: fee', 'ERROR: fie'] is_error = lambda msg: msg.startswith('ERROR: ') errors, others = ( utils.partition(logs, predicate=is_error, enumerated=True)) self.assertEqual(list(errors), [(0, 'ERROR: foo'), (3, 'ERROR: fie')]) self.assertEqual(list(others), [(1, 'INFO: bar'), (2, 'INFO: fee')])
def __init__(self, orig): self.orig = orig img = self.orig.get_image_data() self.orig_data = partition([ord(x) for x in img.get_data("RGB", img.width*3)], 3) self.setup() self.evolutions = 0 self.bene_evolutions = 0
def filter_types(types, iterable, verbose=False): """Filter events that match several types.""" known, unknown = partition(lambda event: event.type in types, iterable) if verbose: for event in unknown: print('Warning: Ignored or unknown event type: %s' % event.type) return known
def grep_split(pattern, text): '''Take the lines in *text* and split them each time the pattern matches a line. ''' lines = text.splitlines() indices = [i for i, line in enumerate(lines) if re.search(pattern, line)] return ['\n'.join(part) for part in partition(lines, indices)]
def von_neumann_vectors(cls, ndim, radius=1, closed=False): """ :param ndim: Number of dimensions :param radius: Von Neumann Neighborhood radius :param closed: Should the neighborhood include all points r < radius or only r = radius :return: Iterator of vectors in the Von Neumann neighborhood, :math:`\{v | v \in Z^n, ||v||_1 = r\}` """ neighborhood = (cls(i) for i in partition(radius, ndim)) if closed and radius > 1: return itertools.chain(neighborhood, cls.von_neumann_vectors(ndim, radius-1, closed)) return neighborhood
def parse(self, text, lexer, tokenfunc=None, skip_lexerrors=False): self.tokenfunc = tokenfunc or (lambda token: token.value) lexer.input(text) self.tokens, lexerrors = partition(lambda token: not token.is_error, list(lexer.get_token())) if lexerrors != [] and not skip_lexerrors: raise ValueError(str(lexerrors[0])) tree, i = self.parse_atom(self.start_symbol, 0) return tree
def GET(self): if session.user is None: web.seeother('/') elif session.user.position.title in ['Counselor', 'Head Counselor']: dates_this_week = dates_of_current_week() periods_of_counselor = DBSession().query(Period).outerjoin(Period.entries).\ filter(Period.date.in_(dates_this_week)).\ filter(or_(Period.entries.any(counselor_id = session.user.id), Period.entries == None)).\ order_by(Period.num, Period.date) periods_partitioned = partition(periods_of_counselor, lambda p: p.num) return render.editweekly(session.user, periods_partitioned, period_labels)
def build_tree(rows): gain, question = find_best_split(rows) if gain == 0: return Leaf(rows) true_rows, false_rows = partition(rows, question) true_branch = build_tree(true_rows) false_branch = build_tree(false_rows) return Decision_Node(question, true_branch, false_branch)
def run_filters(self, filters, hostIDs, vmID, properties_map): result = Result() request_id = str(uuid.uuid1()) log_adapter = \ utils.RequestAdapter(self._logger, {'method': 'run_filters', 'request_id': request_id}) # run each filter in a process for robustness log_adapter.info("got request: %s" % str(filters)) avail_f, missing_f = utils.partition(filters, lambda f: f in self._filters) # handle missing filters for f in missing_f: log_adapter.warning("Filter requested but was not found: %s" % f) result.pluginError(f, "plugin not found: '%s'" % f) # Prepare a generator "list" of runners filterRunners = [ PythonMethodRunner( self._pluginDir, self._class_to_module_map[f], f, utils.FILTER, (hostIDs, vmID, properties_map), request_id) for f in avail_f ] for runner in filterRunners: runner.start() log_adapter.debug("Waiting for filters to finish") # TODO add timeout config if utils.waitOnGroup(filterRunners): log_adapter.warning("Waiting on filters timed out") log_adapter.debug("Aggregating results") filters_results = self.aggregate_filter_results(filterRunners, request_id) if filters_results is None: log_adapter.info('All filters failed, return the full list') result.error("all filters failed") filters_results = hostIDs result.add(filters_results) log_adapter.info('returning: %s' % str(filters_results)) return result
def gen_data(n=300, dataset='clusters'): classes_n = 4 if dataset == 'clusters': data, targets = datasets.make_classification(n_samples=n, n_features=2, n_informative=2, n_redundant=0, n_classes=4, class_sep=2.5, n_clusters_per_class=1) elif dataset == 'circles': data, targets = datasets.make_circles( n_samples=n, shuffle=True, noise=0.1, random_state=None, factor=0.1) elif dataset == 'moons': data, targets = datasets.make_moons(n_samples=n, shuffle=True, noise=0.2) train_data, valid_data, test_data = partition(data, 3) train_targets, valid_targets, test_targets = partition(targets, 3) train_data = normalize(train_data) test_data = normalize(test_data) valid_data = normalize(valid_data) train_set = to_one_hot_vector_targets(classes_n, zip(train_data, train_targets)) valid_set = to_one_hot_vector_targets(classes_n, zip(valid_data, valid_targets)) test_set = to_one_hot_vector_targets(classes_n, zip(test_data, test_targets)) return train_set, valid_set, test_set
def run_cost_functions(self, cost_functions, hostIDs, vmID, properties_map): result = Result() request_id = str(uuid.uuid1()) log_adapter = \ utils.RequestAdapter(self._logger, {'method': 'run_cost_functions', 'request_id': request_id}) # run each filter in a process for robustness log_adapter.info("got request: %s" % str(cost_functions)) # Get the list of known and unknown score functions available_cost_f, missing_cost_f = \ utils.partition(cost_functions, lambda (n, w): n in self._scores) # Report the unknown functions for name, weight in missing_cost_f: log_adapter.warning("requested but was not found: %s" % name) result.pluginError(name, "plugin not found: '%s'" % name) # Prepare a generator "list" with runners and weights scoreRunners = [ (PythonMethodRunner( self._pluginDir, self._class_to_module_map[name], name, utils.SCORE, (hostIDs, vmID, properties_map), request_id), weight) for name, weight in available_cost_f ] for runner, _weight in scoreRunners: runner.start() log_adapter.debug("Waiting for scoring to finish") if utils.waitOnGroup([runner for runner, _weight in scoreRunners]): log_adapter.warning("Waiting on score functions timed out") result.error("Waiting on score functions timed out") log_adapter.debug("Aggregating results") results = self.aggregate_score_results(scoreRunners, request_id) result.add(results) log_adapter.info('returning: %s' % str(results)) return result
def extract_relations(tg_corpus, entity_indices, characteristic_indices): for g in range(len(tg_corpus)): if g in entity_indices or g in characteristic_indices \ or tg_corpus[g][1] not in \ ['JJ', 'JJR', 'JJS', # adj 'NN', 'NNS', 'NNP', 'NNPS', # noun 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', # verb 'RB', 'RBR', 'RBS' # adverb ] \ or tg_corpus[g][1] in ['RB', 'RBR', 'RBS']: tg_corpus[g] = None partitioned = list(utils.partition(tg_corpus, None)) return [ EC.Relation(x[:len(x) - 1], x[len(x) - 1][0]) for x in partitioned ] # TODO: add index support
def get_minibatch(self, minibatch_size): """ Samples a minibatch of configured size from the memory, splits the minibatch into two partitions based on the observation being in terminal state or not. @return: (non_terminal_minibatch, terminal_minibatch) If memory size is smaller than the configure minibatch size, returns (None, None). """ if self.size() < minibatch_size: return None, None minibatch = random.sample(self.memory, minibatch_size) return utils.partition(lambda x: x[4], minibatch)
def subArray(input, k, start, end): if (start > end): return [] if (start == end): if (input[start] > k): return [input[start]] else: return [] index = partition(input, start, end, True) s = sum(input, start, index) if (s <= k): return input[start:index + 1] + subArray(input, k - s, index + 1, end) else: return subArray(input, k, start, index)
def GET(self): if session.user is None: web.seeother('/') # they haven't logged in yet elif session.user.position.title in ['Counselor', 'Head Counselor']: user = session.user db_session = DBSession() counselor = db_session.query(Counselor).filter(Counselor.id==user.id).first() dates_this_week = dates_of_current_week() periods_of_counselor = DBSession().query(Period).outerjoin(Period.entries).\ filter(Period.date.in_(dates_this_week)).\ filter(or_(Period.entries.any(counselor_id = session.user.id), Period.entries == None)).\ order_by(Period.num, Period.date) periods_partitioned = partition(periods_of_counselor, lambda p: p.num) return render.mainpage(user, periods_partitioned, period_labels) elif session.user.position.title in ['Secretary']: raise web.seeother('/upload')
def run_filters(self, filters, hostIDs, vmID, properties_map): result = Result() request_id = str(uuid.uuid1()) log_adapter = \ utils.RequestAdapter(self._logger, {'method': 'run_filters', 'request_id': request_id}) # run each filter in a process for robustness log_adapter.info("got request: %s" % str(filters)) avail_f, missing_f = utils.partition(filters, lambda f: f in self._filters) # handle missing filters for f in missing_f: log_adapter.warning("Filter requested but was not found: %s" % f) result.pluginError(f, "plugin not found: '%s'" % f) # Prepare a generator "list" of runners filterRunners = [ PythonMethodRunner(self._pluginDir, self._class_to_module_map[f], f, utils.FILTER, (hostIDs, vmID, properties_map), request_id) for f in avail_f ] for runner in filterRunners: runner.start() log_adapter.debug("Waiting for filters to finish") # TODO add timeout config if utils.waitOnGroup(filterRunners): log_adapter.warning("Waiting on filters timed out") log_adapter.debug("Aggregating results") filters_results = self.aggregate_filter_results( filterRunners, request_id) if filters_results is None: log_adapter.info('All filters failed, return the full list') result.error("all filters failed") filters_results = hostIDs result.add(filters_results) log_adapter.info('returning: %s' % str(filters_results)) return result
def reduce(key, values): # The reduce() function must be static, so we manually create a "cls" # variable instead of changing the function into a classmethod. cls = SeedFirebaseOneOffJob if key.startswith('SUCCESS:'): yield (key, sum(int(v) for v in values)) elif key == cls.ERROR_BATCH_DELETE: reasons, counts = utils.partition( values, predicate=lambda value: value.startswith('reason=')) debug_info = 'count=%d, reasons=[%s]' % (sum( int(c) for c in counts), ', '.join(sorted({r[7:] for r in reasons}))) yield (key, debug_info) else: yield (key, values)
def __init__(self, player): if config.fullscreen: infoObject = pygame.display.Info() self.map_size = (infoObject.current_w, infoObject.current_h) else: self.map_size = config.size self.holes = [] for i in range(config.num_holes): self.holes.append(Hole(self.randpoint())) # should be less random colonies = [(0, Manager(), player), (1, Manager(), Computer())] holes_for_colony = utils.partition(self.holes, len(colonies)) self.colonies = [] for index, (color, manager, player) in enumerate(colonies): holes = holes_for_colony[index] colony = Colony(color, holes, manager, player) for i in range(config.num_rats): colony.add_new_rat(self.randpoint(), random.choice(holes), 0) self.colonies.append(colony)
def random_graph(x, y, nb_nodes=3, prob_edge=1, cluster_data=False, rnd_state=None): M, _ = x.shape # add offset dim x_copy = np.c_[x, np.ones(M)] # clustering groups = partition(x_copy, y, nb_nodes, cluster_data, random_state=None) nodes = list() for i in range(nb_nodes): n = Node(i, *groups[i]) nodes.append(n) for i, n in enumerate(nodes): neis = [n] + [nodes[j] for j in range(nb_nodes) if i!=j and random() < prob_edge] n.set_neighbors(neis, [1/len(neis)]*len(neis)) return nodes
def parse_downloaded_file(self, file_path): print "Parse %s" % file_path with codecs.open(file_path, "r", "utf-8") as f: lines = [line for line in f] splits = [i for i,line in enumerate(lines) if len(line.strip()) == 0] tables = partition(lines, splits) data = self._csv_lines_to_dict_list(tables[0], headers=["group", "value", "last_year", "change"]) params = {} for row in tables[2][2:]: x = row.split(";") key = x[0].replace(":","") value = x[1].strip() params[key] = value for data_row in data: row = merge_dicts(params, data_row) self.append(row) return self
def line_network(x, y, nb_nodes=3, cluster_data=False): M, _ = x.shape # add offset dim x_copy = np.c_[x, np.ones(M)] # clustering groups = partition(x_copy, y, nb_nodes, cluster_data) nodes = list() nei_ids = list() for i in range(nb_nodes): n = Node(i, *groups[i]) nei_ids.append([j for j in [i-1, i, i+1] if j >= 0 and j < nb_nodes]) nodes.append(n) for ids, n in zip(nei_ids, nodes): n.set_neighbors([nodes[i] for i in ids]) return nodes
def main(): # Read the arguments args = parse_arguments() # Get the class size class_size = args.class_size if args.class_size is not None else DEFAULT_CLASS_SIZE # Read the features from the test files test_files = args.test_files # Ensure at least 1 test file is passed in if test_files is None: print 'Error. Please provide testing feature files' exit(1) test_data, test_labels = utils.read_features(test_files) test_data, test_labels, map = utils.partition(test_data, test_labels, class_size) # Read and load the model if args.svm: model = svm.SVM() model.load(args.model) if args.mlp: model = mlp.MLP(10) model.load(args.model) # Ensure a model was created if model is None: print 'Error. Model invalid' exit(1) # Test the model predictions = model.predict(test_data) accuracy = 1.0 * sum([ 1 for label, predict in zip(test_labels, predictions) if label == predict ]) / len(predictions) # Output results print 'Accuracy is: ', accuracy
def map(item): # The map() function must be static, so we manually create a "cls" # variable instead of changing the function into a classmethod. cls = SeedFirebaseOneOffJob if isinstance(item, cls.ASSOC_MODEL_TYPES): admin_ack = cls.get_admin_ack(item) if admin_ack is not None: yield admin_ack else: yield (cls.wipe_assoc_model(item), 1) return yield (cls.INFO_SEED_MODEL_ACK, item.id) for user_batch in cls.yield_firebase_user_batches(): admins_to_ack, users_to_delete = utils.partition( user_batch, predicate=lambda user: user.email == feconf.ADMIN_EMAIL_ADDRESS ) for user in admins_to_ack: yield ('%s in Firebase account' % cls.INFO_SUPER_ADMIN_ACK, 'firebase_auth_id=%s' % (user.uid)) ids_to_delete = [user.uid for user in users_to_delete] try: result = firebase_auth.delete_users(ids_to_delete, force_delete=True) except Exception as exception: yield (cls.ERROR_BATCH_DELETE, len(ids_to_delete)) yield (cls.ERROR_BATCH_DELETE, 'reason=%r' % exception) else: for error in result.errors: firebase_auth_id = ids_to_delete[error.index] debug_info = 'firebase_auth_id=%s, reason=%s' % ( firebase_auth_id, error.reason) yield (cls.ERROR_INDIVIDUAL_DELETE, debug_info) num_deleted = len(ids_to_delete) - len(result.errors) if num_deleted: yield (cls.SUCCESS_DELETE_ACCOUNTS, num_deleted)
def decode(s, separator, word_len): buf = bytearray() if word_len == 4: words = s.split(separator) else: words = partition(s, 2) for word in words: buf.append(decode_word(word, word_len)) if len(buf) < 5: raise ValueError('Invalid Bytewords.') # Validate checksum body = buf[0:-4] body_checksum = buf[-4:] checksum = crc32_bytes(body) if checksum != body_checksum: raise ValueError('Invalid Bytewords.') return body
def _display_results(player_entries: List[str], possible_words: Set[str]) -> None: """ Display the results of a single-player Boggle game. Print valid player entries alongside all possible words for comparison. :param List[str] player_entries: the list of all player entries :param Set[str] possible_words: the set of all valid words found within the grid :return: None """ valid_entries, invalid_entries = utils.partition( player_entries, lambda e: e in possible_words ) result_join = utils.list_outer_join(valid_entries, list(possible_words)) result_table = [ "{:16} {}".format(found or "", possible) for found, possible in result_join ] print("\n".join(result_table)) print( "You found {} of {} possible words".format( len(valid_entries), len(possible_words) ) )
def handle_issue_events(iterable): """Report issue changes and comments.""" lines = [] unused = iterable actions = defaultdict(list) # Issues events, unused = partition_type('IssuesEvent', unused) for event in events: number = event.payload['issue'].number title = event.payload['issue'].title url = event.payload['issue'].html_url key = (number, title, url) actions[key].append(event.payload['action']) # Issue Comments events, unused = partition(is_issue_comment, unused) for event in events: if event.payload['action'] == 'deleted': continue number = event.payload['issue'].number title = event.payload['issue'].title url = event.payload['issue'].html_url key = (number, title, url) actions[key].append('discussed') for (number, title, url), actions in sorted(actions.items()): did = grammatical_join(list(uniq(actions))) tmpl = '{} [issue #{}]({}) - {}' lines.append(tmpl.format(did, number, url, title)) return (lines, unused)
def main(): # Read the arguments args = parse_arguments() # Get the class size class_size = args.class_size if args.class_size is not None else DEFAULT_CLASS_SIZE # Read the features from the test files test_files = args.test_files # Ensure at least 1 test file is passed in if test_files is None: print 'Error. Please provide testing feature files' exit(1) test_data, test_labels = utils.read_features(test_files) test_data, test_labels, map = utils.partition(test_data, test_labels, class_size) # Read and load the model if args.svm: model = svm.SVM() model.load(args.model) if args.mlp: model = mlp.MLP(10) model.load(args.model) # Ensure a model was created if model is None: print 'Error. Model invalid' exit(1) # Test the model predictions = model.predict(test_data) accuracy = 1.0*sum([1 for label, predict in zip(test_labels, predictions) if label == predict]) / len(predictions) # Output results print 'Accuracy is: ', accuracy
def condProb(cond, pmf) : result = pmf.Copy() for i,j in pmf.Items() : if i < cond : result.Mult(i,0) result.Renormalize() return result.Prob(cond) table = survey.Pregnancies() table.ReadRecords() liveBirths = [x for x in table.records if x.outcome == 1] pmf = pmf.MakePmfFromList([p.prglength for p in liveBirths]) print 'Probability of being born at 39 weeks: ',pmf.Prob(39) print 'Conditional Probability of being born at 39 weeks if at week 38: ',condProb(39,pmf) firstBirths,otherBirths = utils.partition(liveBirths, lambda p : p.outcome == 1 and p.birthord == 1, lambda p : p.prglength) pmf = pmf.MakePmfFromList(firstBirths) xs,ys = pmf.Render() plot.bar(xs, ys, width=0.4, color='white') pmf = pmf.MakePmfFromList(otherBirths) xs,ys = pmf.Render() plot.bar([x + 0.5 for x in xs], ys, width=0.4, color='blue') #plot.bar(valsN,probsN, width=0.4, color='blue') plot.show()
def train_cross_validation(data, labels, models, class_size, k_fold=DEFAULT_K_FOLD): """ This function trains some models and tests them with cross validation. Parameters ---------- data: the training data labels: the corresponding labels for the training data models: the models to train class_size: the amount of examples to use per class """ def gen_svm_params(): params = {} params['degree'] = 0 params['gamma'] = 1 params['coef'] = 0 params['C'] = 1 params['nu'] = 0 params['p'] = 0 params['svm_type'] = cv2.ml.SVM_C_SVC params['kernel_type'] = cv2.ml.SVM_RBF return params def gen_mlp_params(): params = {} params['train_method'] = cv2.ml.ANN_MLP_BACKPROP params['activation_func'] = cv2.ml.ANN_MLP_SIGMOID_SYM params['num_layers'] = 2 params['num_layer_units'] = 50 params['moment_scale'] = 0.1 params['weight_scale'] = 0.1 return params train_error_rates = [[] for model in models] test_error_rates = [[] for model in models] train_confusion_matrices = [0 for model in models] test_confusion_matrices = [0 for model in models] # Partition the training data in the set size train, train_labels, map = utils.partition(data, labels, True, class_size) # Our running accuracies for each K-fold avg_train_accuracies = [0 for model in models] avg_test_accuracies = [0 for model in models] # Auto train params params = [] for model in models: if isinstance(model, svm.SVM): params.append(gen_svm_params()) if isinstance(model, mlp.MLP): params.append(gen_mlp_params()) for traincv, testcv in sklearn.cross_validation.KFold(len(train), n_folds=k_fold): for index, model in enumerate(models): # Train and test classifier model.auto_train(train[traincv], train_labels[traincv], params[index], k_folds=k_fold) train_predict = model.predict(train[traincv]) test_predict = model.predict(train[testcv]) # Calculate accuracy train_accuracy = 1.0 * sum([ 1 for label, predict in zip(train_labels[traincv], train_predict) if label == predict ]) / len(train_predict) test_accuracy = 1.0 * sum([ 1 for label, predict in zip(train_labels[testcv], test_predict) if label == predict ]) / len(test_predict) # Add to running average avg_train_accuracies[index] += train_accuracy avg_test_accuracies[index] += test_accuracy train_cm = sklearn.metrics.confusion_matrix(train_labels[traincv], train_predict, labels=np.arange( len(map))) test_cm = sklearn.metrics.confusion_matrix(train_labels[testcv], test_predict, labels=np.arange( len(map))) train_confusion_matrices[index] = train_cm test_confusion_matrices[index] = test_cm # Calculate average accuracy avg_train_accuracies = [ accuracy / k_fold for accuracy in avg_train_accuracies ] avg_test_accuracies = [ accuracy / k_fold for accuracy in avg_test_accuracies ] # Print average accuracy for index, model in enumerate(models): print '\n\tTraining accuracy for ' + model.__class__.__name__ + ': ', avg_train_accuracies[ index] print '\tTesting accuracy for ' + model.__class__.__name__ + ': ', avg_test_accuracies[ index] # Calculate error rates for index, model in enumerate(models): train_error_rates[index].append(1 - avg_train_accuracies[index]) test_error_rates[index].append(1 - avg_test_accuracies[index]) # Show confusion matrices and learning curves # for index, model in enumerate(models): # print '\n\tTraining confusion matrix for ' + model.__class__.__name__ # show_confusion_matrix(train_confusion_matrices[index]) # print '\n\tTesting confusion matrix for ' + model.__class__.__name__ # show_confusion_matrix(test_confusion_matrices[index]) # show_learning_curves(train_error_rates[index], test_error_rates[index], set_sizes) # Train and save models for index, model in enumerate(models): # Train the model model.auto_train(train, train_labels, params[index], k_folds=10) # Save the model now = datetime.datetime.now() name = now.strftime( "%Y%m%d")[2:] + '_' + model.__class__.__name__ + '.xml' model.save(DEFAULT_OUTPUT_PATH + name) return models
else: topics = params.topics # Get the documents in Reddit format print "Retrieving documents..." reddit_documents = preprocess.getData(topics, params.comment_level, params.num_docs, params.db) print reddit_documents # Preprocess these print "Preprocessing documents..." preprocess.preprocess(reddit_documents, params.max_word_length, params.min_word_length, params.stopwords, params.stem) print reddit_documents ''' TODO: removal_threshold and removal_perc ''' # Split into train and test print "Splitting into train and test sets..." train,test = utils.partition(reddit_documents, .9) # Now save metadata to db to remember parameter configuration print "Saving metadata to mongodb..." metadata = utils.createMetaData(params) result = params.db.add_metadata(metadata) if result: print "Save successful." else: print "Save not successful." # Print each document to file # Add metadata's db id to filename to be able to match up to metadata in db #timestamp = '_'.join(str(datetime.today()).split()) trainpath = "../data/scala/llda_train_"+str(result)+".csv" testpath = "../data/scala/llda_test_"+str(result)+".csv"
def handle_pr_events(iterable, who=None): """Report pull request changes and comments.""" lines = [] unused = iterable actions = defaultdict(list) # Pull Requests events, unused = partition_type('PullRequestEvent', unused) for event in sorted(events, key=lambda event: event.created_at): pr = event.payload['pull_request'] number = pr.number title = pr.title url = pr.html_url user = pr.user.login action = event.payload['action'] if action == 'closed' and pr.to_json()['merged']: action = 'merged' if user == who: action = 'proposed' if action == 'opened' else action action = 'rescinded' if action == 'closed' else action actions[(number, title, url, user)].append(action) # Pull Request General Comments events, unused = partition(is_pr_comment, unused) for event in sorted(events, key=lambda event: event.created_at): if event.payload['action'] == 'deleted': continue pr = event.payload['issue'] number = pr.number title = pr.title url = pr.html_url user = pr.user.login actions[(number, title, url, user)].append('discussed') # Pull Request File / Line Comments events, unused = partition_type('PullRequestReviewCommentEvent', unused) for event in sorted(events, key=lambda event: event.created_at): if event.payload['action'] == 'deleted': continue pr = event.payload['pull_request'] number = pr.number title = pr.title url = pr.html_url user = pr.user.login actions[(number, title, url, user)].append('discussed') for (number, title, url, user), actions in sorted(actions.items()): did = grammatical_join(list(uniq(actions))) if user == who: tmpl = '{} [pull request #{number}]({url}): {title}' else: tmpl = '{} [pull request #{number}]({url}) by @{user}: {title}' lines.append(tmpl.format(did, **locals())) return (lines, unused)
def _rectify_third_party_directory(mismatches): """Rectifies the 'third_party/python_libs' directory state to reflect the current 'requirements.txt' file requirements. It takes a list of mismatches and corrects those mismatches by installing or uninstalling packages. Args: mismatches: dict(str, tuple(str|None, str|None)). Dictionary with the normalized library names as keys and a tuple as values. The 1st element of the tuple is the version string of the library required by the requirements.txt file while the 2nd element is the version string of the library currently installed in the 'third_party/python_libs' directory. If the library doesn't exist, the corresponding tuple element will be None. For example, this dictionary signifies that 'requirements.txt' requires flask with version 1.0.1 while the 'third_party/python_libs' directory contains flask 1.1.1: { flask: ('1.0.1', '1.1.1') } """ # Handling 5 or more mismatches requires 5 or more individual `pip install` # commands, which is slower than just reinstalling all of the libraries # using `pip install -r requirements.txt`. if len(mismatches) >= 5: if os.path.isdir(common.THIRD_PARTY_PYTHON_LIBS_DIR): shutil.rmtree(common.THIRD_PARTY_PYTHON_LIBS_DIR) _reinstall_all_dependencies() return # The library is installed in the directory but is not listed in # requirements. We don't have functionality to remove a library cleanly, and # if we ignore the library, this might cause issues when pushing the branch # to develop as there might be possible hidden use cases of a deleted # library that the developer did not catch. The only way to enforce the # removal of a library is to clean out the folder and reinstall everything # from scratch. if any(required is None for required, _ in mismatches.values()): if os.path.isdir(common.THIRD_PARTY_PYTHON_LIBS_DIR): shutil.rmtree(common.THIRD_PARTY_PYTHON_LIBS_DIR) _reinstall_all_dependencies() return git_mismatches, pip_mismatches = ( utils.partition(mismatches.items(), predicate=_is_git_url_mismatch)) for normalized_library_name, versions in git_mismatches: requirements_version, directory_version = versions # The library listed in 'requirements.txt' is not in the # 'third_party/python_libs' directory. if not directory_version or requirements_version != directory_version: _install_direct_url(normalized_library_name, requirements_version) for normalized_library_name, versions in pip_mismatches: requirements_version = ( pkg_resources.parse_version(versions[0]) if versions[0] else None) directory_version = ( pkg_resources.parse_version(versions[1]) if versions[1] else None) # The library listed in 'requirements.txt' is not in the # 'third_party/python_libs' directory. if not directory_version: _install_library( normalized_library_name, python_utils.convert_to_bytes(requirements_version)) # The currently installed library version is not equal to the required # 'requirements.txt' version. elif requirements_version != directory_version: _install_library( normalized_library_name, python_utils.convert_to_bytes(requirements_version)) _remove_metadata( normalized_library_name, python_utils.convert_to_bytes(directory_version))
def partition_type(typ, lst): """Split an event list into two lists based on event.type.""" return partition(lambda event: event.type == typ, lst)
def load_qs(self, qs_loc=None): with open(qs_loc or self.qs_loc, 'r') as f: qlns = f.read().splitlines() sections, qs = ut.partition(lambda s: not s.startswith(':'), qlns) self.qs = map(str.split, qs)
def build_choices(g, items, candidates, players, num, akaris, shared): from thb.item import ImperialChoice from thb.characters.baseclasses import Character # ANCHOR(test) # ----- testing ----- all_characters = Character.character_classes testing = list(all_characters[i] for i in settings.TESTING_CHARACTERS) candidates, _ = partition(lambda c: c not in testing, candidates) if g.SERVER_SIDE: candidates = list(candidates) g.random.shuffle(candidates) else: candidates = [None] * len(candidates) if shared: entities = ['shared'] num = [num] akaris = [akaris] else: entities = players assert len(num) == len(akaris) == len(entities), 'Uneven configuration' assert sum(num) <= len(candidates) + len(testing), 'Insufficient choices' result = defaultdict(list) entities_for_testing = entities[:] candidates = list(candidates) seed = get_seed_for(g.players) shuffler = random.Random(seed) shuffler.shuffle(entities_for_testing) for e, cls in zip(cycle(entities_for_testing), testing): result[e].append(CharChoice(cls)) # ----- imperial (force chosen by ImperialChoice) ----- imperial = ImperialChoice.get_chosen(items, players) imperial = [(p, CharChoice(cls)) for p, cls in imperial] for p, c in imperial: result['shared' if shared else p].append(c) # ----- normal ----- for e, n in zip(entities, num): for _ in xrange(len(result[e]), n): result[e].append(CharChoice(candidates.pop())) # ----- akaris ----- if g.SERVER_SIDE: rest = candidates else: rest = [None] * len(candidates) g.random.shuffle(rest) for e, n in zip(entities, akaris): for i in xrange(-n, 0): result[e][i].set(rest.pop(), True) # ----- compose final result, reveal, and return ----- if shared: result = OrderedDict([(p, result['shared']) for p in players]) else: result = OrderedDict([(p, result[p]) for p in players]) for p, l in result.items(): p.reveal(l) return result, imperial
print "Load data" npz = np.load(args.in_npz) imgs_tr_np = npz['imgs_tr_np'] tsne = npz['tsne'] pts = npz['pts'] infos = npz['infos'] label_set = set(i['l'] for i in infos if i['src'] == "dataset") trans_set = set(i['tr'] for i in infos if i['src'] != "dataset") label_max = max(label_set) tr_map = { k:label_max+1+i for i, k in enumerate(trans_set) } print "Filtering data" Xn, Xd = partition(lambda y: y[1]['src'] == "dataset", np.array([pts, infos]).T) pts = None; infos = None # free memory Y2 = filter(lambda y: args.filter in y[1]['src'], Xn) pts2 = np.array([ y[0] for y in np.concatenate((Xd, Y2)) ]) colors = np.fromiter(( i['l'] if i['l'] >= 0 else tr_map[i['tr']] for [pt, i] in np.concatenate((Xd, Y2)) ), dtype=np.uint8) # add one annotated point per class ds_annotates = set() for [pt, info] in Xd: label = info['l'] if label not in ds_annotates: Y2.append([ pt, info ]) ds_annotates.add(label) if ds_annotates == label_set: break
def receive(self, packet): """This function is long and complicated. This should be taken as proof that it works, as the AX12 is also complicated. This function simulates the receiving of a packet by the AX12 object that it is called on. To ensure accurate simulation, this method may take some time to return to the caller. As such it is likely that the caller will want to call this function inside of a separate thread to reflect the fact that each AX12 has its own processor. Params: packet - A list of byte values containing the raw data of an ax12 instruction packet. See the ax12 manual for a description of this format. Returns: Either a status packet (in byte form) that should be sent back to the 'controller' or None if no status packet is required. """ status = None status_return_level = self.get_status_return() inst_pack = InstPacket.deserialize(packet) if inst_pack.checksum != inst_pack.calc_checksum(): raise AX12ChecksumException('Bad checksum on received packet') if inst_pack.dyn_id != self.dyn_id and inst_pack.dyn_id != const.ID_BROADCAST: raise AX12Error('Instruction packet delivered to\ motor ' + self.dyn_id + ' instead of\ motor ' + inst_pack.dyn_id) #Determine instruction and process it if inst_pack.instruction == const.INST_PING: #Pings always and only return a status status = StatusPacket(self.dyn_id, 0, []) elif inst_pack.instruction == const.INST_READDATA: params = inst_pack.params start_addr = params[0] data_len = params[1] read_bytes = [] for offset in range(data_len): read_bytes.append(self.get_byte(start_addr + offset)) #Return the read data if applicable if status_return_level != const.SRL_NONE: status = StatusPacket(self.dyn_id, 0, read_bytes) elif inst_pack.instruction == const.INST_WRITEDATA: #Write bytes to memory and return a status if necessary params = inst_pack.params start_addr = params[0] self.set_bytes(start_addr, params[1:]) if status_return_level == const.SRL_ALL: status = StatusPacket(self.dyn_id, 0, []) elif inst_pack.instruction == const.INST_REGWRITE: #ASSUME: Overwrite REGWRITE buffer if new REGWRITE #instruction occurs before an ACTION instruction. self.set_byte(const.ADDR_REGISTER, 1) self.regwrite_buffer = inst_pack.params if status_return_level == const.SRL_ALL: status = StatusPacket(self.dyn_id, 0, []) elif inst_pack.instruction == const.INST_ACTION: params = self.regwrite_buffer start_addr = params[0] self.set_bytes(start_addr, params[1:]) self.set_byte(const.ADDR_REGISTER, 0) if status_return_level == const.SRL_ALL: status = StatusPacket(self.dyn_id, 0, []) elif inst_pack.instruction == const.INST_RESET: #Set all bytes to default and resolve references again for byte in self.mem: byte.reset() self._resolve_refs() if status_return_level == const.SRL_ALL: status = StatusPacket(self.dyn_id, 0, []) elif inst_pack.instruction == const.INST_SYNCWRITE: params = inst_pack.params start_addr = params[0] single_length = params[1] #Find parameters for this dynamixel for my_params in partition(params[2:], single_length): if my_params[0] == self.dyn_id: #The params are for me. Write the data! self.set_bytes(start_addr, my_params[1:]) if status_return_level == const.SRL_ALL: status = StatusPacket(self.dyn_id, 0, []) else: raise AX12Exception('Bad instruction received') #Return a status if it is required if status is not None and inst_pack.dyn_id != const.ID_BROADCAST: #Sleep in microseconds time.sleep(self.get_delay_time() / 1000000) return status.serialize() else: return None
def train_cross_validation(data, labels, models, class_size, k_fold=DEFAULT_K_FOLD): """ This function trains some models and tests them with cross validation. Parameters ---------- data: the training data labels: the corresponding labels for the training data models: the models to train class_size: the amount of examples to use per class """ def gen_svm_params(): params = {} params['degree'] = 0 params['gamma'] = 1 params['coef'] = 0 params['C'] = 1 params['nu'] = 0 params['p'] = 0 params['svm_type'] = cv2.ml.SVM_C_SVC params['kernel_type'] = cv2.ml.SVM_RBF return params def gen_mlp_params(): params = {} params['train_method'] = cv2.ml.ANN_MLP_BACKPROP params['activation_func'] = cv2.ml.ANN_MLP_SIGMOID_SYM params['num_layers'] = 2 params['num_layer_units'] = 50 params['moment_scale'] = 0.1 params['weight_scale'] = 0.1 return params train_error_rates = [[] for model in models] test_error_rates = [[] for model in models] train_confusion_matrices = [0 for model in models] test_confusion_matrices = [0 for model in models] # Partition the training data in the set size train, train_labels, map = utils.partition(data, labels, True, class_size) # Our running accuracies for each K-fold avg_train_accuracies = [0 for model in models] avg_test_accuracies = [0 for model in models] # Auto train params params = [] for model in models: if isinstance(model, svm.SVM): params.append(gen_svm_params()) if isinstance(model, mlp.MLP): params.append(gen_mlp_params()) for traincv, testcv in sklearn.cross_validation.KFold(len(train), n_folds=k_fold): for index, model in enumerate(models): # Train and test classifier model.auto_train(train[traincv], train_labels[traincv], params[index], k_folds=k_fold) train_predict = model.predict(train[traincv]) test_predict = model.predict(train[testcv]) # Calculate accuracy train_accuracy = 1.0*sum([1 for label, predict in zip(train_labels[traincv], train_predict) if label == predict]) / len(train_predict) test_accuracy = 1.0*sum([1 for label, predict in zip(train_labels[testcv], test_predict) if label == predict]) / len(test_predict) # Add to running average avg_train_accuracies[index] += train_accuracy avg_test_accuracies[index] += test_accuracy train_cm = sklearn.metrics.confusion_matrix(train_labels[traincv], train_predict, labels=np.arange(len(map))) test_cm = sklearn.metrics.confusion_matrix(train_labels[testcv], test_predict, labels=np.arange(len(map))) train_confusion_matrices[index] = train_cm test_confusion_matrices[index] = test_cm # Calculate average accuracy avg_train_accuracies = [accuracy/k_fold for accuracy in avg_train_accuracies] avg_test_accuracies = [accuracy/k_fold for accuracy in avg_test_accuracies] # Print average accuracy for index, model in enumerate(models): print '\n\tTraining accuracy for ' + model.__class__.__name__ + ': ', avg_train_accuracies[index] print '\tTesting accuracy for ' + model.__class__.__name__ + ': ', avg_test_accuracies[index] # Calculate error rates for index, model in enumerate(models): train_error_rates[index].append(1-avg_train_accuracies[index]) test_error_rates[index].append(1-avg_test_accuracies[index]) # Show confusion matrices and learning curves # for index, model in enumerate(models): # print '\n\tTraining confusion matrix for ' + model.__class__.__name__ # show_confusion_matrix(train_confusion_matrices[index]) # print '\n\tTesting confusion matrix for ' + model.__class__.__name__ # show_confusion_matrix(test_confusion_matrices[index]) # show_learning_curves(train_error_rates[index], test_error_rates[index], set_sizes) # Train and save models for index, model in enumerate(models): # Train the model model.auto_train(train, train_labels, params[index], k_folds=10) # Save the model now = datetime.datetime.now() name = now.strftime("%Y%m%d")[2:] + '_' + model.__class__.__name__ + '.xml' model.save(DEFAULT_OUTPUT_PATH+name) return models