def deep_cite(): content = request.get_json() try: claim = content['claim'] link = content['link'] except Exception as e: return jsonify( {'error': 'Error 505: claim or link cannot be gathered'}) # def deep_cite(claim, link): full_pre_json = { 'error': 'none', 'results': [{ 'citeID': str(uuid.uuid4()), 'parentCiteID': 0, 'link': link, 'score': 1, 'source': claim }] } try: tree = Tree(link, claim) full_pre_json['results'] = tree.response_object # handles exceptions that arise except Exception as e: if check_instance(e): full_pre_json['error'] = str(e) else: traceback.print_exc() link = html_link( 'https://github.com/connorjoleary/DeepCite/issues') full_pre_json['error'] = str('Error 500: Internal Server Error ' + str(e) + "." + \ new_indention("Please add your error to " + link + " with the corresponding claim and link.")) response = jsonify(full_pre_json) response.headers["Content-Type"] = "application/json; charset=utf-8" return response
def build_telenor(): telenor = Tree("a8f.9.telenor.se") # smallbandsbolaget sbbolaget = Tree("cce76-top.smalbandsbolaget.se") sb_sthlm = Tree("ns.645-52-stockholm.smalbandsbolaget.se") sbbolaget.add_child(sb_sthlm) for n in [ "Lisas router", "Familjen Larsson"]: sb_sthlm.add_child(Tree(n)) telenor.add_child(sbbolaget) # Judiths nät judith = Tree("58.5-ce6.judith-och-judith.se") judith_sth = Tree("a-8ed.sth-63.judith-och-judith.se") judith.add_child(judith_sth) for n in ["dlink-653C", "Kalle hem"]: judith_sth.add_child(Tree(n)) telenor.add_child(judith) return telenor
def convert_graph(data_dir): _allowed_error = 0.000001 rname = os.path.join(data_dir, 'train_lf') rf = open(rname, 'w') for fname in ('train.graph', 'valid.graph'): print('reading', fname) pname = os.path.join(data_dir, fname) with codecs.open(pname, 'r', 'utf-8') as f: for line in f: line = json.loads(line) sen = line['sentence'] sen = sen.split(' ') forest, answer = line['forest'], line['answerF1'] good_lf = [] bad_lf = [] for choice in forest: entity_list = choice['entities'] for graph in choice['graphs']: lf = graph2lf(graph['graph'], entity_list) parse_tree = Tree() parse_tree.construct_from_sexp(lf) nt, ter = parse_tree.get_nt_ter() if set(graph['denotation']) & set(answer): good_lf.append((lf, graph['denotation'])) else: bad_lf.append((lf, graph['denotation'])) json.dump(sen, rf) rf.write('\t') json.dump(answer, rf) rf.write('\t') json.dump(good_lf, rf) rf.write('\t') json.dump(bad_lf, rf) rf.write('\n')
def solver(self, adjLists): solutions = [] # Get the set of all words in the corpus. Set is more performant than list here. corpus = set(w.lower() for w in nltk.corpus.words.words()) # load words into tree. Don't load anything shorter than 3 or longer than # current letter set plus 1(ie 4x4 board would be 16 characters w/o QU or 17 with) # max because we can only visit that letter once) tree = Tree() for w in corpus: if len(w) < 3: continue if len(w) > (self.boardSize * self.boardSize) + 1: continue first = Node(w[0]) subsequent = w[1:] parent = tree.addNode(first) last = None for c in subsequent: child = tree.addNode(Node(c), parent) parent = child last = child # mark the end of a word for later if last is not None: last.endsWord = True # walk the word tree with the letters on the board, # building up strings by adjacencies. visited = [] sols = [] #pos in self.currentLetters: # board in order # Not working yet. ''' for idx, val in np.ndenumerate(self.currentLetters): #idx position #val letter self.findStrings(idx, tree.root.children[0], visited) ''' # just returning something for now so I can pass them to the front end return ['solution1', 'solution2', 'solution3']
def main(): config = read_json("ms_config")["Boots"] web_tree = Tree() create_categories(web_tree, config) assing_hrefs(web_tree, config) for category_name in config["categories"].keys(): category = web_tree.get_category_by_name(category_name) print(category.get_name()) for link in category.get_href(single=False): subcategories = re.sub("https://www.boots.com/","",link).split("/") for i, subcategory in enumerate(subcategories): #Create categories and edges between them if web_tree.get_category_by_name(subcategory): parent_category = web_tree.get_category_by_name(subcategory) print(f"Parent: {subcategory}") continue else: print(f"Created: {subcategory}") new_subcategory = Category(name=subcategory) web_tree.add_category(new_subcategory) web_tree.create_edge(parent_category, new_subcategory) parent_category = new_subcategory #Add product details to leaf if i == len(subcategories)-1: print(f"{link}----------------------------------------------------------") try: soup = Webpage.get_source_code(link) except Exception as ex: print(f"Error - link ************************** {link} - {ex}") continue for product_detail in Webpage.get_element(soup, "div", "class", "estore_product_container"): new_product = get_product_details(product_detail) if new_product: parent_category.add_product(new_product) print(new_product) #print(f"\t{str(link)}") write_to_csv("boots", web_tree, config)
def __init__(self, board, algorithm): self.board = [] self.blocks = [] self.solved = False self.solution = [] self.createPieceBoard(board) self.createBlocks() self.tree = Tree(self) if algorithm != 0: startAlgTime = time.time() if algorithm == 1: self.tree.breadthFirst(self) elif algorithm == 2: self.tree.depthFirst(self) elif algorithm == 3: print("Insert the desired max depth.") n = self.userInputNumber() self.tree.limitedDepthSearch(self, n) elif algorithm == 4: print("Insert the desired depth.") n = self.userInputNumber() self.tree.progressiveDeepening(self, n) elif algorithm == 5: self.tree.uniform_cost_search(self) elif algorithm == 6: self.tree.greedy([], self) elif algorithm == 7: self.tree.a_star(self) endAlgTime = time.time() print("Time elapsed: ", end="") print(round(endAlgTime - startAlgTime, 3), end="") print("s") self.solution = self.tree.solution() print("Number of moves: ", end="") print(len(self.solution) - 1)
def get_bionj_tree( self, model=None, datatype=None, ncat=1, optimise='n', tmpdir='/tmp', overwrite=True, verbose=False, ): if not overwrite and self.tree.newick: print '{0}: Tree exists and overwrite set to false'.format( self.name) return self.tree self.Tree = Tree() filename = self._write_temp_phylip(tmpdir=tmpdir, use_hashname=True) print 'Running bionj on ' + str(self.name) + '...' input_file = '{0}/{1}.phy'.format(tmpdir, filename) if not model and not datatype: # quick-fix to allow specification of other if self.datatype == 'dna': # models when calling phyml model = 'GTR' datatype = 'nt' elif self.datatype == 'protein': model = 'WAG' datatype = 'aa' else: print 'I don\'t know this datatype: {0}'.format(self.datatype) return t = self.tree.run_bionj( model, input_file, datatype, ncat=ncat, name=self.name, optimise=optimise, overwrite=overwrite, verbose=verbose, ) os.remove('{0}/{1}.phy'.format(tmpdir, filename)) return self.tree
def main(): t = Tree() n1 = t.make_node(14) n2 = t.make_node(11) n3 = t.make_node(16) n4 = t.make_node(10) n5 = t.make_node(12) n6 = t.make_node(15) n7 = t.make_node(17) n8 = t.make_node(13) n9 = t.make_node(18) n1.lchild = n2 n1.rchild = n3 n2.lchild = n4 n2.rchild = n5 n3.lchild = n6 n3.rchild = n7 n5.rchild = n8 n7.rchild = n9 print(True if is_balanced(t.root) else False)
def main(): t = Tree() n1 = t.make_node(1) n2 = t.make_node(2) n3 = t.make_node(3) n4 = t.make_node(4) n5 = t.make_node(5) n6 = t.make_node(6) n7 = t.make_node(7) n1.lchild = n2 n1.rchild = n3 n2.lchild = n4 n2.rchild = n5 n3.lchild = n6 n3.rchild = n7 print("LCA of {} and {} : {}".format(n4.data, n6.data, find_lca(t.root, n4, n6)))
def main(): t = Tree() n1 = t.make_node(14) n2 = t.make_node(11) n3 = t.make_node(16) n4 = t.make_node(10) n5 = t.make_node(12) n6 = t.make_node(15) n7 = t.make_node(17) n8 = t.make_node(13) n1.lchild = n2 n1.rchild = n3 n2.lchild = n4 n2.rchild = n5 n3.lchild = n6 n3.rchild = n7 n5.rchild = n8 lst = [] make_level_list(t.root, lst) print(lst)
def test_find_interval(self): a = Node(20, 25) b = Node(4, 5) c = Node(26, 30) d = Node(3, 4) e = Node(9, 10) f = Node(15, 20) tree = Tree(a) tree.insert(b) tree.insert(c) tree.insert(d) tree.insert(e) tree.insert(f) self.assertFalse(tree.lookup(4, 10)) self.assertTrue(tree.lookup(25, 26)) self.assertFalse(tree.lookup(9, 20)) self.assertFalse(tree.lookup(0, 25)) self.assertTrue(tree.lookup(30, 40))
def t_H(t): m = t.lexer.lexmatch t.lexer.lineno += m.group(0).count('\n') sep = m.group(0)[-1] level = '=-~*^'.index(sep) + 1 title = m.group('title') name = m.group('name') oname = m.group('oname') t.type = 'H%s' % level t.value = Tree('sect%s' % level, title) t.value.title = title t.value.level = level t.value.name = oname if name: t.value.name = name if not t.value.title: t.value.title = t.value.name return t
def main(): """Main entry point for prediction script""" print("\tY-Haplogroup Prediction") namespace = get_arguments() in_folder = namespace.input output = namespace.outfile read_backbone_groups() final_table = [] for folder in read_input_folder(in_folder): # make sure to reset this for each sample global QC1_SCORE_CACHE QC1_SCORE_CACHE = {} haplotype_dict = read_yleaf_out_file(folder / (folder.name + ".out")) tree = Tree("Hg_Prediction_tables/tree.json") best_haplotype_score = get_most_likely_haplotype( tree, haplotype_dict, namespace.minimum_score) add_to_final_table(final_table, haplotype_dict, best_haplotype_score, folder) write_final_table(final_table, output) print("--- Yleaf 'Y-Haplogroup prediction' finished... ---")
def fit(self, X, y, depth=0, max_depth=2): value, column, loss, leftX, rightX, lefty, righty = self.split(X, y) print leftX, lefty # print 'depht', depth, 'loss', loss, 'value', value if depth >= max_depth: return None else: t = Tree(value, column) if depth == 0: self.tree = t t.left_tree = self.fit(leftX, lefty, depth=depth + 1, max_depth=max_depth) t.right_tree = self.fit(rightX, righty, depth=depth + 1, max_depth=max_depth) return t
def create_trees(sentence, deprel2idx): ids = [int(item[4]) for item in sentence] parents = [int(item[10]) for item in sentence] trees = dict() roots = dict() for i in range(len(ids)): tree = Tree(i, deprel2idx.get(sentence[i][11],deprel2idx[_UNK_]), sentence[i][8]) trees[ids[i]] = tree for i in range(len(parents)): index = ids[i] parent = parents[i] if parent == 0: roots[i] = trees[index] continue trees[parent].add_child(trees[index]) return trees,roots
def choose_root(): global blocked_attrs global tree_root global current_instances global attribute_value_dict blocked_attrs = [] # calculate E_score for each attribute best_attr = find_best_attr(instances) # choose a root for tree tree_root = Tree(attribute_name=best_attr, classes_num=[0, 0], childs_value=attribute_value_dict[best_attr], is_attr=True) # add to blocked list blocked_attrs.append(best_attr) # make empty current data to add them to tree in next step for instance in current_instances: is_change, node = tree_root.add_instance( instance, attribute_value_dict) current_instances = []
def parseWhileStatement(tokensAndTypes): tree = parseExpression(tokensAndTypes) if tokensAndTypes[0][0] != "do": raise Exception( "Missing 'do'" ) # checks to make sure while statement contains 'do' after expression else: tokensAndTypes.pop( 0) # removes the current token from the list of tokens tree = Tree("WHILE-LOOP", "KEYWORD", tree, None, parseStatement(tokensAndTypes)) if tokensAndTypes[0][0] != "endwhile": raise Exception( "Missing 'endwhile'") # checks for 'endwhile' after statement tokensAndTypes.pop( 0) # removes the current token from the list of tokens return tree
def all_matches(cls, criteria, pattern, enclosing_session=None): """ Generator of SimpleTree objects (see matcher.py) from articles matching the given criteria and the pattern """ with SessionContext(commit=True, read_only=True, session=enclosing_session) as session: # t0 = time.time() mcnt = acnt = tcnt = 0 # print("Starting article loop") for a in cls.articles(criteria, enclosing_session=session): acnt += 1 tree = Tree(url=a.url, authority=a.authority) tree.load(a.tree) for ix, simple_tree in tree.simple_trees(): tcnt += 1 for match in simple_tree.all_matches(pattern): yield (a, ix, match) mcnt += 1
def changelog_from_spec(spec): res = Tree() hdr = spec.sourceHeader log = "" for (name, timestamp, text) in zip(hdr['changelogname'], hdr['changelogtime'], hdr['changelogtext']): # A Debian package's version is defined by the version of the # first entry in the changelog, so we must get this right. # Most spec files have changelog entries starting "First Last # <*****@*****.**> - version" - this seems to be the standard # for Red Hat spec files. # Some of our changelos only have "First Last <*****@*****.**>". # For these, we use the version from the spec. match = re.match("^(.+) - (\S+)$", name) if match: author = match.group(1) version = match.group(2) else: author = name version = "%s-%s" % (spec.sourceHeader['version'], spec.sourceHeader['release']) package_name = mappkgname.map_package(hdr['name'])[0] log += "%s (%s) UNRELEASED; urgency=low\n" % (package_name, version) log += "\n" text = re.sub("^-", "*", text, flags=re.MULTILINE) text = re.sub("^", " ", text, flags=re.MULTILINE) log += "%s\n" % text log += "\n" date_string = time.strftime("%a, %d %b %Y %H:%M:%S %z", time.gmtime(int(timestamp))) log += " -- %s %s\n" % (author, date_string) log += "\n" res.append('debian/changelog', log) return res
def debinarize(tree): if '_' in tree.label: subtrees = [] for subtree in tree.subs: debinarized_sub = debinarize(subtree) if type(debinarized_sub) == list: subtrees += debinarized_sub else: subtrees += [debinarized_sub] # this is a forest return subtrees # return [debinarize(subtree) for subtree in tree.subs] # otherwise the root node is a regular node and returns a tree. else: # no children if tree.subs == None: return tree # children else: subtrees = [] for sub in tree.subs: #this is for the non-smoothed version of the grammar # if '@' in sub.label: if '_' in sub.label: # if the child node is one of the added ones, take its subtrees # and attach them to the root node. for subtree in sub.subs: debinarized_sub = debinarize(subtree) if type(debinarized_sub) == list: subtrees += debinarized_sub else: subtrees.append(debinarized_sub) # subtrees += [debinarize(subtree) for subtree in sub.subs] else: debinarized_sub = debinarize(sub) if type(debinarized_sub) == list: subtrees += debinarized_sub else: subtrees += [debinarize(sub)] # subtrees += [debinarize(sub)] return Tree(tree.label, tree.span, subs=subtrees)
def fit(self, max_tree, seed=42): np.random.seed(seed) self.forest = [] i = 0 while i < max_tree: # let's fit tree i # instance-wise stochasticity x_in_node = np.random.choice( [True, False], self.n, p=[self.sub_sample, 1 - self.sub_sample]) # feature-wise stochasticity f_in_tree_ = np.random.choice(range(self.m), self.nf, replace=False) f_in_tree = np.array([False] * self.m) for e in f_in_tree_: f_in_tree[e] = True del f_in_tree_ # initialize the root of this tree root = Tree(None, None, None, None, None) # grow the tree from root grow_tree(root, f_in_tree, x_in_node, self.depth - 1, self.x_val_sorted, self.x_index_sorted, self.y_train, self.g_tilde, self.h_tilde, self.eta, self.lam, self.gamma, self.min_instances) if root is not None: i += 1 self.forest.append(root) else: next for j in range(self.n): self.y_tilde[j] += self.forest[-1]._predict_single( self.x_train[j]) self.g_tilde[j], self.h_tilde[j] = gh_lm( self.y_train[j], self.y_tilde[j]) if self.x_test is not None: # test on the testing instances y_hat = self.predict(self.x_test) print("iter: {0:>4} rmse: {1:1.6f}".format( i, rmse(self.y_test, y_hat)))
def _deserialize(self, stream): """:param from_rev_list: if true, the stream format is coming from the rev-list command Otherwise it is assumed to be a plain data stream from our object""" readline = stream.readline self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id << 12, '') self.parents = list() next_line = None while True: parent_line = readline() if not parent_line.startswith('parent'): next_line = parent_line break # END abort reading parents self.parents.append( type(self)(self.repo, hex_to_bin(parent_line.split()[-1]))) # END for each parent line self.parents = tuple(self.parents) self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date( next_line) self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date( readline()) # now we can have the encoding line, or an empty line followed by the optional # message. self.encoding = self.default_encoding # read encoding or empty line to separate message enc = readline() enc = enc.strip() if enc: self.encoding = enc[enc.find(' ') + 1:] # now comes the message separator readline() # END handle encoding # a stream from our data simply gives us the plain message # The end of our message stream is marked with a newline that we strip self.message = stream.read() return self
def main(arguments): if len(arguments) < 3: print 'The argument 1 should be a filename to pickle tree contents into.' print 'The argument 2 should be the directory where the images can be found.' else: imagenet_tree = Tree('Image Net Tree') # Add our selected parent nodes imagenet_tree.addNode('n00017222') # Plant flora root node imagenet_tree.addNode('n13083586', 'n00017222') # Vascular root # Add nodes which we need to build the tree for to a dictionary form node:parent node_dictionary = { 'n13134302': 'n13083586', # Vascular child: Bulbous 'n13121544': 'n13083586', # Vascular child: Aquatic 'n13121104': 'n13083586', # Vascular child: Desert 'n13103136': 'n13083586', # Vascular child: Woody plants 'n13100677': 'n13083586', # Vascular child: Vines 'n13085113': 'n13083586', # Vascular child: Weed 'n13084184': 'n13083586', # Vascular child: Succulent 'n12205694': 'n13083586', # Vascular child: Herb 'n11552386': 'n13083586', # Vascular child: Spermatophyte 'n11545524': 'n13083586', # Vascular child: Pteridophyte 'n13100156': 'n00017222', # House plant root 'n13083023': 'n00017222', # Poisonous root } for node in node_dictionary: print 'Processing %s' % (node) number_of_images = 0 if (os.path.isdir(arguments[2] + '/' + node)): number_of_images = len([ name for name in os.listdir(arguments[2] + '/' + node) if os.path.isfile(arguments[2] + '/' + node + '/' + name) and name[-5:] == '.JPEG' ]) imagenet_tree.addNode(node, node_dictionary[node], number_of_images) print 'Images found for %s:%i' % (node, number_of_images) addAllNodes(imagenet_tree, node, arguments[2]) pickled_file = open(arguments[1], 'wb') pickle.dump(imagenet_tree, pickled_file)
def prune_tree(root, left, right): """remove all nodes that are (totally) out of the span (left, right)""" newchildren = [] for c in root.children: if c.leftidx < left and c.rightidx > right: # this children fully convers the span newchildren.append(prune_tree(c, left, right)) elif c.leftidx < left and c.rightidx <= right and c.rightidx > left: newchildren.append(prune_tree(c, left, right)) elif c.leftidx >= left and c.rightidx <= right: newchildren.append(c) elif c.leftidx >= left and c.leftidx < right and c.rightidx > right: newchildren.append(prune_tree(c, left, right)) # aggregate all children, remove all consecutive unary rules if len(newchildren) == 1 and len(newchildren[0].children) == 1: newchildren[0].val = root.val return newchildren[0] else: ret = Tree(val=root.val, children=newchildren) ret.leftidx = newchildren[0].leftidx ret.rightidx = newchildren[-1].rightidx return ret
def __init__(self, in_features, num_trees, tree_depth, num_classes): super(Forest, self).__init__() self.in_features = in_features self.num_trees = num_trees self.tree_depth = tree_depth self.num_classes = num_classes self.num_split_per_tree = 2 ** (self.tree_depth - 1) - 1 assert self.num_split_per_tree <= self.in_features self.register_buffer('feature_mask', torch.zeros(self.num_split_per_tree, self.num_trees).long()) self.linear = nn.Linear(in_features, in_features, bias=False) self.trees = nn.ModuleList() for i in range(self.num_trees): self.feature_mask[:, i] = torch.from_numpy(np.random.choice(self.in_features, self.num_split_per_tree)) self.trees.append(Tree(tree_depth, num_classes))
def test_double_rotation(self): a = Node(20, 25) b = Node(4, 5) c = Node(26, 30) d = Node(3, 4) e = Node(9, 10) f = Node(15, 20) tree = Tree(a) tree.insert(b) tree.insert(c) tree.insert(d) tree.insert(e) tree.insert(f) # tree shape self.assertEqual(tree.root, e) self.assertEqual(tree.root.left, b) self.assertEqual(tree.root.right, a) self.assertEqual(tree.root.left.left, d) self.assertEqual(tree.root.right.left, f) self.assertEqual(tree.root.right.right, c)
def parseIfStatement(tokensAndTypes): tree = parseExpression(tokensAndTypes) if tokensAndTypes[0][0] != "then": raise Exception("Missing 'then'") # checks for 'then' after expression else: tokensAndTypes.pop( 0) # removes the current token from the list of tokens statement1 = parseStatement( tokensAndTypes) # first statement for the middle node of tree if tokensAndTypes[0][0] != "else": raise Exception( "Missing 'else'") # checks for 'else' after statement else: tokensAndTypes.pop( 0) # removes the current token from the list of tokens tree = Tree("IF-STATEMENT", "KEYWORD", tree, statement1, parseStatement(tokensAndTypes)) return tree
def test_subtree_at(): t = Tree("(mul (add 1 2) (sub (div 8 2) 4))") assert len(t) == 9 with pytest.raises(IndexError): t.subtree_at(9) depth, tree = t.subtree_at(4) assert depth == 1 assert str(tree) == "(sub (div 8 2) 4)" depth, tree = t.subtree_at(0) assert depth == 0 assert str(tree) == str(t) depth, tree = t.subtree_at(5) assert depth == 2 assert str(tree) == "(div 8 2)" depth, tree = t.subtree_at(7) assert depth == 3 assert str(tree) == "2"
def computer_move(self): """ Set computer point in state Position determines by points of next possible steps :return: """ # Tree of possible next steps tree = Tree(self.state, self.last_move) next_state = tree.choose_next_move() current_move = next_state.last_move i, j = self.number_cell_to_state_indexes(current_move[1]) # Set point in state and removes bust cell from free cells self.free_cells.remove(current_move[1]) self.state[i][j] = current_move[0] self.last_move = current_move # This state is the last possible if next_state.last_state: if next_state.points == self.WIN_POINTS: raise GameOver('Computer wins!') else: raise GameOver('No one wins.')
def load_data(data_dir, order='pre_order'): ''' construct vocab and load data with a specified traversal order :param data_dir: :param order: :return: ''' word_vocab = Vocab() nt_vocab = Vocab() ter_vocab = Vocab() act_vocab = Vocab() act_vocab.feed_all(['NT', 'TER', 'ACT']) word_tokens = collections.defaultdict(list) tree_tokens = collections.defaultdict(list) tran_actions = collections.defaultdict(list) for fname in ('train', 'valid', 'test'): print('reading', fname) pname = os.path.join(data_dir, fname) with codecs.open(pname, 'r', 'utf-8') as f: for line in f: sen, sexp = line.rstrip().split('\t') sen = sen.split(' ') word_vocab.feed_all(sen) word_tokens[fname].append(sen) parse_tree = Tree() parse_tree.construct_from_sexp(sexp) nt, ter = parse_tree.get_nt_ter() nt_vocab.feed_all(nt) ter_vocab.feed_all(ter) traverse_method = getattr(parse_tree, order) tree_token, action = traverse_method(_ROOT) tree_tokens[fname].append(tree_token) tran_actions[fname].append(action) return word_vocab, nt_vocab, ter_vocab, act_vocab, word_tokens, tree_tokens, tran_actions