def test_constructor_with_nothing(self): OrderedSet()
def test_constructor(self): OrderedSet([1, 2, 3, 4])
def test_uniqueness(self): numbers = OrderedSet([1, 3, 2, 4, 2, 1, 4, 5]) self.assertEqual(sorted(numbers), [1, 2, 3, 4, 5])
def ghetto_import(): if flask.request.remote_addr != '127.0.0.1': return flask.error(403) torrent_file = flask.request.files.get('torrent') try: torrent_dict = bencode.decode(torrent_file) # field.data.close() except (bencode.MalformedBencodeException, UnicodeError): return 'Malformed torrent file', 500 try: forms._validate_torrent_metadata(torrent_dict) except AssertionError as e: return 'Malformed torrent metadata ({})'.format(e.args[0]), 500 try: tracker_found = forms._validate_trackers(torrent_dict) except AssertionError as e: return 'Malformed torrent trackers ({})'.format(e.args[0]), 500 bencoded_info_dict = bencode.encode(torrent_dict['info']) info_hash = utils.sha1_hash(bencoded_info_dict) # Check if the info_hash exists already in the database torrent = models.Torrent.by_info_hash(info_hash) if not torrent: return 'This torrent does not exists', 500 if torrent.has_torrent: return 'This torrent already has_torrent', 500 # Torrent is legit, pass original filename and dict along torrent_data = forms.TorrentFileData(filename=os.path.basename( torrent_file.filename), torrent_dict=torrent_dict, info_hash=info_hash, bencoded_info_dict=bencoded_info_dict) # The torrent has been validated and is safe to access with ['foo'] etc - all relevant # keys and values have been checked for (see UploadForm in forms.py for details) info_dict = torrent_data.torrent_dict['info'] changed_to_utf8 = backend._replace_utf8_values(torrent_data.torrent_dict) torrent_filesize = info_dict.get('length') or sum( f['length'] for f in info_dict.get('files')) # In case no encoding, assume UTF-8. torrent_encoding = torrent_data.torrent_dict.get('encoding', b'utf-8').decode('utf-8') # Store bencoded info_dict torrent.info = models.TorrentInfo( info_dict=torrent_data.bencoded_info_dict) torrent.has_torrent = True # To simplify parsing the filelist, turn single-file torrent into a list torrent_filelist = info_dict.get('files') used_path_encoding = changed_to_utf8 and 'utf-8' or torrent_encoding parsed_file_tree = dict() if not torrent_filelist: # If single-file, the root will be the file-tree (no directory) file_tree_root = parsed_file_tree torrent_filelist = [{ 'length': torrent_filesize, 'path': [info_dict['name']] }] else: # If multi-file, use the directory name as root for files file_tree_root = parsed_file_tree.setdefault( info_dict['name'].decode(used_path_encoding), {}) # Parse file dicts into a tree for file_dict in torrent_filelist: # Decode path parts from utf8-bytes path_parts = [ path_part.decode(used_path_encoding) for path_part in file_dict['path'] ] filename = path_parts.pop() current_directory = file_tree_root for directory in path_parts: current_directory = current_directory.setdefault(directory, {}) # Don't add empty filenames (BitComet directory) if filename: current_directory[filename] = file_dict['length'] parsed_file_tree = utils.sorted_pathdict(parsed_file_tree) json_bytes = json.dumps(parsed_file_tree, separators=(',', ':')).encode('utf8') torrent.filelist = models.TorrentFilelist(filelist_blob=json_bytes) db.session.add(torrent) db.session.flush() # Store the users trackers trackers = OrderedSet() announce = torrent_data.torrent_dict.get('announce', b'').decode('ascii') if announce: trackers.add(announce) # List of lists with single item announce_list = torrent_data.torrent_dict.get('announce-list', []) for announce in announce_list: trackers.add(announce[0].decode('ascii')) # Remove our trackers, maybe? TODO ? # Search for/Add trackers in DB db_trackers = OrderedSet() for announce in trackers: tracker = models.Trackers.by_uri(announce) # Insert new tracker if not found if not tracker: tracker = models.Trackers(uri=announce) db.session.add(tracker) db.session.flush() db_trackers.add(tracker) # Store tracker refs in DB for order, tracker in enumerate(db_trackers): torrent_tracker = models.TorrentTrackers(torrent_id=torrent.id, tracker_id=tracker.id, order=order) db.session.add(torrent_tracker) db.session.commit() return 'success'
def _improvement_callback(route_data, callback_datastructures, sweep_rhos, sweep_phis, sweep_J_pos, step_inc, routed): """ This callback implements the Gillett and Miller (1974) improvement heuristic. It involves trying to remove a node and insertion of several candidates. Therefore, the algorithm involves Steps 8-15 in the appedix of Gillett and Miller (1974). """ # unpack callback data structures (packed in pack_datastructures) N, D, NN_D, d, C, L, node_to_pos, pos_to_node, avr = callback_datastructures # do not try to improve, if the J node is already routed (already Swept # full circle) node_J = pos_to_node[sweep_J_pos] if routed[node_J]: # nothing to do, no nodes added, no nodes removed, route is complete return route_data, [], [], True # unpack route information, (route, route_cost, route_demand) D1_route, D1, D1_demand, D1_nodes = route_data # G&M Step 8. This messy looking line vectorizes the minimization of # R(K(I))+An(K(I))*AVR # # What makes it a little more messier, is that the angle is pointing # "the right way" depending on the cw/ccw direction (encoded in step_inc) # +1 is there to convert indexing as there is no depot in node_rho_phis route_K_nodes = list(D1_nodes[1:]) route_K_positions = [node_to_pos[n] for n in route_K_nodes] route_rhos = sweep_rhos[route_K_positions] route_phis = sweep_phis[route_K_positions] rem_choose_function = route_rhos + route_phis * avr to_remove_node_KII = route_K_nodes[np.argmin(rem_choose_function)] if __debug__: log( DEBUG - 2, "G&M improvement phase for route %s (%.2f). Trying to replace KII=n%d." % (str(D1_route), D1, to_remove_node_KII)) log( DEBUG - 3, "This is due to R(K(I))+An(K(I)*AVR = %s" % str(zip(route_K_nodes, route_phis, list(rem_choose_function)))) # take the node J-1 (almost always the node last added on the route) sweep_prev_of_J_pos = _step(sweep_J_pos, -step_inc, N - 2) prev_node_J = pos_to_node[sweep_prev_of_J_pos] # Get the insertion candidates try: candidate_node_JJX = next((node_idx for node_idx, _ in NN_D[prev_node_J] if not routed[node_idx])) except StopIteration: if __debug__: log(DEBUG - 2, "G&M Step 9, not enough unrouted nodes left for JJX.") log(DEBUG - 2, "-> EXIT with no changes") return route_data, [], [], False try: candidate_node_JII = next( (node_idx for node_idx, _ in NN_D[candidate_node_JJX] if (not routed[node_idx] and node_idx != candidate_node_JJX))) except StopIteration: candidate_node_JII = None # construct and route to get the modified route cost D2 D2_route_nodes = OrderedSet(D1_nodes) D2_route_nodes.remove(to_remove_node_KII) D2_route_nodes.add(candidate_node_JJX) D2_route, D2 = solve_tsp(D, list(D2_route_nodes)) D2_demand = D1_demand - d[to_remove_node_KII] + d[ candidate_node_JJX] if C else 0 ## G&M Step 9 if not ((L and D2 - S_EPS < L) and (C and D2_demand - C_EPS <= C)): if __debug__: log( DEBUG - 2, "G&M Step 9, rejecting replacement of KII=n%d with JJX=n%d" % (to_remove_node_KII, candidate_node_JJX)) log( DEBUG - 3, " which would have formed a route %s (%.2f)" % (str(D2_route), D2)) if (C and D2_demand - C_EPS > C): log(DEBUG - 3, " violating the capacity constraint") else: log(DEBUG - 3, " violating the maximum route cost constraint") log(DEBUG - 2, " -> EXIT with no changes") # go to G&M Step 10 -> # no changes, no skipping, route complete return route_data, [], [], True ## G&M Step 11 D3_nodes = OrderedSet() # the min. dist. from 0 through J,J+1...J+4 to J+5 D4_nodes = OrderedSet() # the min. dist. /w JJX excluded, KII included D6_nodes = OrderedSet() # the min. dist. /w JJX and JII excl., KII incl. JJX_in_chain = False JII_in_chain = False # step back so that the first node to lookahead is J lookahead_pos = sweep_prev_of_J_pos for i in range(5): lookahead_pos = _step(lookahead_pos, step_inc, N - 2) lookahead_node = pos_to_node[lookahead_pos] if routed[lookahead_node]: continue D3_nodes.add(lookahead_node) if lookahead_node == candidate_node_JJX: # inject KII instead of JJX D4_nodes.add(to_remove_node_KII) D6_nodes.add(to_remove_node_KII) JJX_in_chain = True elif lookahead_node == candidate_node_JII: D4_nodes.add(lookahead_node) JII_in_chain = True else: D4_nodes.add(lookahead_node) D6_nodes.add(lookahead_node) # if JJX was not in the sequence J, J+1, ... J+5 if not JJX_in_chain: if __debug__: log( DEBUG - 2, "G&M Step 11, JJX=n%d not in K(J)..K(J+4)" % candidate_node_JJX) log(DEBUG - 3, " which consists of nodes %s" % str(list(D3_nodes))) log(DEBUG - 2, "-> EXIT with no changes") # go to G&M Step 10 -> # no changes, no skipping, route complete return route_data, [], [], True # The chain *end point* J+5 last_chain_pos = _step(lookahead_pos, step_inc, N - 2) last_chain_node = pos_to_node[last_chain_pos] if routed[last_chain_node]: last_chain_node = 0 # D3 -> EVALUATE the MINIMUM distance from 0 through J,J+1...J+4 to J+5 _, D3 = _shortest_path_through_nodes(D, 0, last_chain_node, D3_nodes) # D4 -> DETERMINE the MINIMUM distance with JJX excluded, KII included _, D4 = _shortest_path_through_nodes(D, 0, last_chain_node, D4_nodes) if not (D1 + D3 < D2 + D4): ## G&M Step 12 if __debug__: log( DEBUG - 2, "G&M Step 12, accept an improving move where " + "KII=n%d is removed and JJX=n%d is added" % (to_remove_node_KII, candidate_node_JJX)) log(DEBUG - 3, " which forms a route %s (%.2f)" % (str(D2_route), D2)) log(DEBUG - 2, " -> EXIT and continue adding nodes") ignored_nodes = [to_remove_node_KII] if candidate_node_JJX != node_J: ignored_nodes += [node_J] # go to G&M Step 4 -> # route changed, KII removed and skip current node J, not complete return RouteData(D2_route, D2, D2_demand, D2_route_nodes),\ [candidate_node_JJX], ignored_nodes, False else: ## G&M Step 13 # JII and JJX (checked earlier) should be in K(J)...K(J+4) to continue if not JII_in_chain: if __debug__: if candidate_node_JII is None: log(DEBUG - 2, "G&M Step 13, no unrouted nodes left for JII.") else: log( DEBUG - 2, "G&M Step 13, JII=n%d not in K(J)..K(J+4)" % candidate_node_JII) log(DEBUG - 3, " which consists of nodes %s" % str(list(D3_nodes))) log(DEBUG - 2, "-> EXIT with no changes") # go to G&M Step 10 -> no changes, no skipping, route complete return route_data, [], [], True # construct and route to get the modified route cost D2 D5_route_nodes = D2_route_nodes D5_route_nodes.add(candidate_node_JII) D5_route, D5 = solve_tsp(D, list(D5_route_nodes)) D5_demand = D2_demand + d[candidate_node_JII] if C else 0 if not ((L and D5 - S_EPS < L) and (C and D5_demand - C_EPS <= C)): if __debug__: log( DEBUG - 2, "G&M Step 13, rejecting replacement of KII=n%d with JJX=n%d and JII=n%d" % (to_remove_node_KII, candidate_node_JJX, candidate_node_JII)) log( DEBUG - 3, " which would have formed a route %s (%.2f)" % (str(D5_route), D5)) if D5_demand - C_EPS > C: log(DEBUG - 3, " violating the capacity constraint") else: log(DEBUG - 3, " violating the maximum route cost constraint") log(DEBUG - 2, "-> EXIT with no changes") # go to G&M Step 10 -> no changes, no skipping, route complete return route_data, [], [], True ## G&M Step 14 # D6 -> DETERMINE the MINIMUM distance with JJX and JII excluded and # KII ncluded _, D6 = _shortest_path_through_nodes(D, 0, last_chain_node, D6_nodes) if D1 + D3 < D5 + D6: if __debug__: log( DEBUG - 2, "G&M Step 14, rejecting replacement of KII=n%d with JJX=n%d and JII=n%d" % (to_remove_node_KII, candidate_node_JJX, candidate_node_JII)) log( DEBUG - 3, " which would have formed a route %s (%.2f)" % (str(D5_route), D5)) log(DEBUG - 2, "-> EXIT with no changes") # go to G&M Step 10 -> no changes, no skipping, route complete return route_data, [], [], True ## G&M Step 15 if __debug__: log( DEBUG - 2, "G&M Step 15, accept improving move where " + "KII=n%d is removed and JJX=n%d and JII=n%d are added" % (to_remove_node_KII, candidate_node_JJX, candidate_node_JII)) log(DEBUG - 3, " which forms a route %s (%.2f)" % (str(D2_route), D2)) log(DEBUG - 2, " -> EXIT and continue adding nodes") ignored_nodes = [to_remove_node_KII] if candidate_node_JJX != node_J and candidate_node_JII != node_J: ignored_nodes += [node_J] # go to G&M Step 4 -> # route changed, KII removed and skip current node J, not complete return RouteData(D5_route, D5, D5_demand, D5_route_nodes),\ [candidate_node_JJX, candidate_node_JII],\ ignored_nodes, False
def __init__(self, node: 'plenum.server.node.Node', instId: int, isMaster: bool = False): """ Create a new replica. :param node: Node on which this replica is located :param instId: the id of the protocol instance the replica belongs to :param isMaster: is this a replica of the master protocol instance """ HasActionQueue.__init__(self) self.stats = Stats(TPCStat) self.config = getConfig() routerArgs = [(ReqDigest, self._preProcessReqDigest)] for r in [PrePrepare, Prepare, Commit]: routerArgs.append((r, self.processThreePhaseMsg)) routerArgs.append((Checkpoint, self.processCheckpoint)) routerArgs.append((ThreePCState, self.process3PhaseState)) self.inBoxRouter = Router(*routerArgs) self.threePhaseRouter = Router((PrePrepare, self.processPrePrepare), (Prepare, self.processPrepare), (Commit, self.processCommit)) self.node = node self.instId = instId self.name = self.generateName(node.name, self.instId) self.outBox = deque() """ This queue is used by the replica to send messages to its node. Replica puts messages that are consumed by its node """ self.inBox = deque() """ This queue is used by the replica to receive messages from its node. Node puts messages that are consumed by the replica """ self.inBoxStash = deque() """ If messages need to go back on the queue, they go here temporarily and are put back on the queue on a state change """ self.isMaster = isMaster # Indicates name of the primary replica of this protocol instance. # None in case the replica does not know who the primary of the # instance is self._primaryName = None # type: Optional[str] # Requests waiting to be processed once the replica is able to decide # whether it is primary or not self.postElectionMsgs = deque() # PRE-PREPAREs that are waiting to be processed but do not have the # corresponding request digest. Happens when replica has not been # forwarded the request by the node but is getting 3 phase messages. # The value is a list since a malicious entry might send PRE-PREPARE # with a different digest and since we dont have the request finalised, # we store all PRE-PPREPARES self.prePreparesPendingReqDigest = { } # type: Dict[Tuple[str, int], List] # PREPAREs that are stored by non primary replica for which it has not # got any PRE-PREPARE. Dictionary that stores a tuple of view no and # prepare sequence number as key and a deque of PREPAREs as value. # This deque is attempted to be flushed on receiving every # PRE-PREPARE request. self.preparesWaitingForPrePrepare = {} # type: Dict[Tuple[int, int], deque] # COMMITs that are stored for which there are no PRE-PREPARE or PREPARE # received self.commitsWaitingForPrepare = {} # type: Dict[Tuple[int, int], deque] # Dictionary of sent PRE-PREPARE that are stored by primary replica # which it has broadcasted to all other non primary replicas # Key of dictionary is a 2 element tuple with elements viewNo, # pre-prepare seqNo and value is a tuple of Request Digest and time self.sentPrePrepares = {} # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], float]] # Dictionary of received PRE-PREPAREs. Key of dictionary is a 2 # element tuple with elements viewNo, pre-prepare seqNo and value is # a tuple of Request Digest and time self.prePrepares = {} # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], float]] # Dictionary of received Prepare requests. Key of dictionary is a 2 # element tuple with elements viewNo, seqNo and value is a 2 element # tuple containing request digest and set of sender node names(sender # replica names in case of multiple protocol instances) # (viewNo, seqNo) -> ((identifier, reqId), {senders}) self.prepares = Prepares() # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], Set[str]]] self.commits = Commits() # type: Dict[Tuple[int, int], # Tuple[Tuple[str, int], Set[str]]] # Set of tuples to keep track of ordered requests. Each tuple is # (viewNo, ppSeqNo) self.ordered = OrderedSet() # type: OrderedSet[Tuple[int, int]] # Dictionary to keep track of the which replica was primary during each # view. Key is the view no and value is the name of the primary # replica during that view self.primaryNames = {} # type: Dict[int, str] # Holds msgs that are for later views self.threePhaseMsgsForLaterView = deque() # type: deque[(ThreePhaseMsg, str)] # Holds tuple of view no and prepare seq no of 3-phase messages it # received while it was not participating self.stashingWhileCatchingUp = set() # type: Set[Tuple] # Commits which are not being ordered since commits with lower view # numbers and sequence numbers have not been ordered yet. Key is the # viewNo and value a map of pre-prepare sequence number to commit self.stashedCommitsForOrdering = {} # type: Dict[int, # Dict[int, Commit]] self.checkpoints = SortedDict(lambda k: k[0]) self.stashingWhileOutsideWaterMarks = deque() # Low water mark self._h = 0 # type: int # High water mark self.H = self._h + self.config.LOG_SIZE # type: int self.lastPrePrepareSeqNo = self.h # type: int
def gen_size_rules(self, size): rules = OrderedSet() rules.add("{} u{} u{} bvadd".format(size, size, size)) rules.add("{} u{} u{} bvsub".format(size, size, size)) rules.add("{} u{} u{} bvmul".format(size, size, size)) rules.add("{} {} u{} u{} bvconcat u{} bvudiv".format( size, size, size, size, size)) rules.add("{} {} u{} u{} bvconcat u{} bvsdiv".format( size, size, size, size, size)) rules.add("{} {} u{} u{} bvconcat u{} bvurem".format( size, size, size, size, size)) rules.add("{} {} u{} u{} bvconcat u{} bvsrem".format( size, size, size, size, size)) # rules.add("{} u{} u{} bvudiv".format(size, size, size)) # rules.add("{} u{} u{} bvsdiv".format(size, size, size)) # rules.add("{} u{} u{} bvurem".format(size, size, size)) # rules.add("{} u{} u{} bvsrem".format(size, size, size)) rules.add("{} u{} u{} bvshl".format(size, size, size)) rules.add("{} u{} u{} bvlshr".format(size, size, size)) rules.add("{} u{} u{} bvashr".format(size, size, size)) rules.add("{} u{} u{} bvand".format(size, size, size)) rules.add("{} u{} u{} bvor".format(size, size, size)) rules.add("{} u{} u{} bvxor".format(size, size, size)) rules.add("{} u{} bvnot".format(size, size)) rules.add("{} u{} bvneg".format(size, size)) if size == 64: # movzx if 32 in self.bit_sizes: rules.add("32 64 u32 zero_extend") if 16 in self.bit_sizes: rules.add("16 64 u16 zero_extend") if 8 in self.bit_sizes: rules.add("8 64 u8 zero_extend") # movsx if 32 in self.bit_sizes: rules.add("32 64 32 u32 sign_extend") if 16 in self.bit_sizes: rules.add("16 64 16 u16 sign_extend") if 8 in self.bit_sizes: rules.add("8 64 8 u8 sign_extend") # concat if 32 in self.bit_sizes: rules.add("32 u32 u32 bvconcat") if size == 32: # movzx if 16 in self.bit_sizes: rules.add("16 32 u16 zero_extend".format(size)) if 8 in self.bit_sizes: rules.add("8 32 u8 zero_extend".format(size)) # movsx if 16 in self.bit_sizes: rules.add("16 32 16 u16 sign_extend") if 8 in self.bit_sizes: rules.add("8 32 8 u8 sign_extend") # extract if 64 in self.bit_sizes: rules.add("64 u64 0 31 bvextract") # concat if 16 in self.bit_sizes: rules.add("16 u16 u16 bvconcat") if size == 16: # movzx if 8 in self.bit_sizes: rules.add("8 16 u8 zero_extend".format(size)) # movsx if 8 in self.bit_sizes: rules.add("8 16 8 u8 sign_extend") # extract if 64 in self.bit_sizes: rules.add("64 u64 0 15 bvextract") if 32 in self.bit_sizes: rules.add("32 u32 0 15 bvextract") # concat if 8 in self.bit_sizes: rules.add("8 u8 u8 bvconcat") if size == 8: # extract if 64 in self.bit_sizes: rules.add("64 u64 0 7 bvextract") rules.add("64 u64 8 15 bvextract") if 32 in self.bit_sizes: rules.add("32 u32 0 7 bvextract") rules.add("32 u32 8 15 bvextract") if 16 in self.bit_sizes: rules.add("16 u16 0 7 bvextract") rules.add("16 u16 8 15 bvextract") for v in self.variables: if self.variables[v] == size: rules.add(v) for c in self.constants: rules.add(c) return rules
def main(): import os import logging #logging.basicConfig(level=logging.DEBUG) requests.packages.urllib3.disable_warnings() domain_list = OrderedSet() options = parse_arguments() output_fd = None if not options.output_file is None: try: output_fd = open(options.output_file, 'w') except Exception as e: print('Cannot open output file \"{0}\"'.format( options.output_file)) print(str(e)) exit(1) if not options.url is None: domain_list.add(clean_domain(options.url)) elif not options.input_file_list is None: for input_file in options.input_file_list: if not os.path.exists(input_file): print("Input file \"{0}\" does not exist".format(input_file)) exit(1) if not os.path.isfile(input_file): print("Input file \"{0}\" is not a valid file".format( input_file)) exit(1) if not os.access(input_file, os.R_OK): print("Input file \"{0}\" is not readable".format(input_file)) exit(1) for input_file in options.input_file_list: with open(input_file, 'r', errors="replace") as infile: for line in infile: line = clean_domain(line) domain_list.add(line) domain_list = list(domain_list) elif check_stdin(): lines = sys.stdin.readlines() for line in [clean_domain(l) for l in lines]: domain_list.add(line) domain_list = list(domain_list) crtsh = Crtsh() for domain in domain_list: domain = parse_uri(domain) if domain.startswith('www.'): domain = domain[4:] if domain.startswith('*.'): domain = domain[2:] result = crtsh.get_domains(domain) # If it is a *.www.domain, process both cases if domain.startswith('www.'): result = crtsh.get_domains(domain[4:]) if not options.silent: print_domains(result) result += result if not output_fd is None: output_fd.writelines(result) if not output_fd is None: output_fd.close()
def placeholders(self): if not getattr(self, 'content', ''): return set() return OrderedSet( Placeholder(body).name for body in re.findall(self.placeholder_pattern, self.content))
def test_length(self): numbers = OrderedSet([1, 2, 4, 2, 1, 4, 5]) self.assertEqual(len(numbers), 4) self.assertEqual(len(OrderedSet('hiya')), 4) self.assertEqual(len(OrderedSet('hello there')), 7)
def test_containment(self): numbers = OrderedSet([1, 2, 4, 2, 1, 4, 5]) self.assertIn(2, numbers) self.assertNotIn(3, numbers)
def main_func(input_question): question = input_question # question="Jane had 4 apples. She gave 1 to Umesh. How many apples does jane hav now?" RNN = recurrent.LSTM EMBED_HIDDEN_SIZE = 50 SENT_HIDDEN_SIZE = 100 QUERY_HIDDEN_SIZE = 100 BATCH_SIZE = 32 EPOCHS = 10 print('RNN / Embed / Sent / Query = {}, {}, {}, {}'.format( RNN, EMBED_HIDDEN_SIZE, SENT_HIDDEN_SIZE, QUERY_HIDDEN_SIZE)) train = get_stories(open("DATA/train_LSTM_26112016", 'r', encoding='utf-8')) test = get_stories(open("DATA/test_LSTM_26112016", 'r', encoding='utf-8')) story, query = chunck_question(question) print(story) new_story = [] new_query = [] for i in story: x = word_tokenize(i) for j in x: new_story.append(str(j)) new_query = word_tokenize(query) n_query = list(map(str, new_query)) vocab = sorted( reduce(lambda x, y: x | y, (OrderedSet(story + q + [answer]) for story, q, answer in train + test))) for i in n_query: vocab.append(i) for i in new_story: vocab.append(i) vocab_size = len(vocab) + 1 vocab_answer_set = OrderedSet() for story, q, answer in train + test: for item in answer.split(): if re.search('\+|\-|\*|/', item): vocab_answer_set.add(item) vocab_answer = list(vocab_answer_set) vocab_answer_size = len(vocab_answer) word_idx = OrderedDict((c, i + 1) for i, c in enumerate(vocab)) word_idx_answer = OrderedDict((c, i) for i, c in enumerate(vocab_answer)) word_idx_operator_reverse = OrderedDict( (i, c) for i, c in enumerate(vocab_answer)) story_maxlen = max(map(len, (x for x, _, _ in train + test))) query_maxlen = max(map(len, (x for _, x, _ in train + test))) X, Xq, Y = vectorize_stories(train, word_idx, word_idx_answer, story_maxlen, query_maxlen) tX, tXq, tY = vectorize_stories(test, word_idx, word_idx_answer, story_maxlen, query_maxlen) print("erer" + str(n_query)) xp, xqp = vectorize(new_story, n_query, word_idx, word_idx_answer, story_maxlen, query_maxlen) print('Build model...') print(vocab_size, vocab_answer_size) sentrnn = Sequential() sentrnn.add( Embedding(vocab_size, EMBED_HIDDEN_SIZE, input_length=story_maxlen)) sentrnn.add(Dropout(0.3)) qrnn = Sequential() qrnn.add( Embedding(vocab_size, EMBED_HIDDEN_SIZE, input_length=query_maxlen)) qrnn.add(Dropout(0.3)) qrnn.add(RNN(EMBED_HIDDEN_SIZE, return_sequences=False)) qrnn.add(RepeatVector(story_maxlen)) model = Sequential() model.add(Merge([sentrnn, qrnn], mode='sum')) model.add(RNN(EMBED_HIDDEN_SIZE, return_sequences=False)) model.add(Dropout(0.3)) model.add(Dense(vocab_answer_size, activation='softmax')) if sys.argv[1] == "train": model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) print('Training') model.fit([X, Xq], Y, batch_size=BATCH_SIZE, nb_epoch=EPOCHS, validation_split=0.05) model.save('my_model.h5') if sys.argv[1] == "test": model = load_model("my_model.h5") model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) loss, acc = model.evaluate([tX, tXq], tY, batch_size=BATCH_SIZE) print("Testing") print('Test loss / test accuracy = {:.4f} / {:.4f}'.format(loss, acc)) goldLabels = list() predictedLabels = list() for pr in model.predict([xp, xqp]): predictedLabels.append(word_idx_operator_reverse[np.argsort(pr)[-1]]) print(predictedLabels) numlist = [] numlist = list(re.findall(r"[-+]?\d*\.\d+|\d+", input_question)) answer = find_answer(predictedLabels[0], numlist) print(answer) return answer
class Tagger: # Class level constants WORDS = OrderedSet(('fixme', 'todo', 'bug', 'hack', 'note', 'xxx')) MARKS = OrderedDict([('!!!', 'alert'), ('???', 'question')])
def test_string_representations(self): numbers = OrderedSet(n**2 for n in [1, 2, 3, 4]) empty = OrderedSet() self.assertEqual(str(numbers), "OrderedSet([1, 4, 9, 16])") self.assertEqual(repr(empty), "OrderedSet([])")
def complete(self): self.num_functions = len(self.functions['queries']) + len( self.functions['actions']) GRAMMAR = OrderedDict({ '$input': [('$rule', ), ('executor', '=', '$constant_Entity(tt:username)', ':', '$rule'), ('policy', '$policy'), ('bookkeeping', '$bookkeeping')], '$bookkeeping': [('special', '$special'), ('answer', '$constant_Any')], '$special': [(x, ) for x in SPECIAL_TOKENS], '$rule': [('$stream', '=>', '$action'), ('$stream_join', '=>', '$action'), ('now', '=>', '$table', '=>', '$action'), ('now', '=>', '$action'), ('$rule', 'on', '$param_passing')], '$policy': [('true', ':', '$policy_body'), ('$filter', ':', '$policy_body')], '$policy_body': [('now', '=>', '$policy_action'), ('$policy_query', '=>', 'notify'), ('$policy_query', '=>', '$policy_action')], '$policy_query': [ ('*', ), #('$thingpedia_device_star'), ( '$thingpedia_queries', ), ('$thingpedia_queries', 'filter', '$filter') ], '$policy_action': [ ('*', ), #('$thingpedia_device_star'), ( '$thingpedia_actions', ), ('$thingpedia_actions', 'filter', '$filter') ], '$table': [('$thingpedia_queries', ), ('(', '$table', ')', 'filter', '$filter'), ('aggregate', 'min', '$out_param_Any', 'of', '(', '$table', ')'), ('aggregate', 'max', '$out_param_Any', 'of', '(', '$table', ')'), ('aggregate', 'sum', '$out_param_Any', 'of', '(', '$table', ')'), ('aggregate', 'avg', '$out_param_Any', 'of', '(', '$table', ')'), ('aggregate', 'count', 'of', '(', '$table', ')'), ('aggregate', 'argmin', '$out_param_Any', '$constant_Number', ',', '$constant_Number', 'of', '(', '$table', ')'), ('aggregate', 'argmax', '$out_param_Any', '$constant_Number', ',', '$constant_Number', 'of', '(', '$table', ')'), ('$table_join', ), ('window', '$constant_Number', ',', '$constant_Number', 'of', '(', '$stream', ')'), ('timeseries', '$constant_Date', ',', '$constant_Measure(ms)', 'of', '(', '$stream', ')'), ('sequence', '$constant_Number', ',', '$constant_Number', 'of', '(', '$table', ')'), ('history', '$constant_Date', ',', '$constant_Measure(ms)', 'of', '(', '$table', ')')], '$table_join': [('(', '$table', ')', 'join', '(', '$table', ')'), ('$table_join', 'on', '$param_passing')], '$stream': [ ('timer', 'base', '=', '$constant_Date', ',', 'interval', '=', '$constant_Measure(ms)'), ( 'attimer', 'time', '=', '$constant_Time', ), ('monitor', '(', '$table', ')'), ('monitor', '(', '$table', ')', 'on', 'new', '$out_param_Any'), ('monitor', '(', '$table', ')', 'on', 'new', '[', '$out_param_list', ']'), ('edge', '(', '$stream', ')', 'on', '$filter'), ('edge', '(', '$stream', ')', 'on', 'true'), #('$stream_join',) ], '$stream_join': [('(', '$stream', ')', '=>', '(', '$table', ')'), ('$stream_join', 'on', '$param_passing')], '$action': [('notify', ), ('return', ), ('$thingpedia_actions', )], '$thingpedia_queries': [('$thingpedia_queries', '$const_param')], '$thingpedia_actions': [('$thingpedia_actions', '$const_param')], '$param_passing': [], '$const_param': [], '$out_param_Any': [], '$out_param_Array(Any)': [], '$out_param_list': [('$out_param_Any', ), ('$out_param_list', ',', '$out_param_Any')], '$filter': [('$or_filter', ), ( '$filter', 'and', '$or_filter', )], '$or_filter': [('$atom_filter', ), ( 'not', '$atom_filter', ), ('$or_filter', 'or', '$atom_filter')], '$atom_filter': [('$thingpedia_queries', '{', 'true', '}'), ('$thingpedia_queries', '{', '$filter', '}')], '$constant_Array': [( '[', '$constant_array_values', ']', )], '$constant_array_values': [('$constant_Any', ), ('$constant_array_values', ',', '$constant_Any')], '$constant_Any': OrderedSet(), }) if self._use_span: GRAMMAR['$word_list'] = [ ('SPAN', ), ] else: GRAMMAR['$word_list'] = [('WORD', ), ('$word_list', 'WORD')] def add_type(type, value_rules, operators): assert all(isinstance(x, tuple) for x in value_rules) GRAMMAR['$constant_' + type] = value_rules if not type.startswith('Entity(') and type != 'Time': GRAMMAR['$constant_Any'].add(('$constant_' + type, )) for op in operators: GRAMMAR['$atom_filter'].append( ('$out_param_' + type, op, '$constant_' + type)) # FIXME reenable some day #GRAMMAR['$atom_filter'].add(('$out_param', op, '$out_param')) GRAMMAR['$atom_filter'].append( ('$out_param_' + type, 'in_array', '[', '$constant_' + type, ',', '$constant_' + type, ']')) GRAMMAR['$atom_filter'].append(('$out_param_Array(' + type + ')', 'contains', '$constant_' + type)) GRAMMAR['$out_param_' + type] = [] GRAMMAR['$out_param_Array(' + type + ')'] = [] GRAMMAR['$out_param_Any'].append(('$out_param_' + type, )) GRAMMAR['$out_param_Any'].append( ('$out_param_Array(' + type + ')', )) # base types for type, (operators, values) in TYPES.items(): value_rules = [] for v in values: if isinstance(v, tuple): value_rules.append(v) elif v == 'QUOTED_STRING': for i in range(MAX_STRING_ARG_VALUES): value_rules.append((v + '_' + str(i), )) elif v[0].isupper(): for i in range(MAX_ARG_VALUES): value_rules.append((v + '_' + str(i), )) else: value_rules.append((v, )) add_type(type, value_rules, operators) for base_unit, units in UNITS.items(): value_rules = [('$constant_Number', 'unit:' + unit) for unit in units] value_rules += [('$constant_Measure(' + base_unit + ')', '$constant_Number', 'unit:' + unit) for unit in units] operators, _ = TYPES['Number'] add_type('Measure(' + base_unit + ')', value_rules, operators) for i in range(MAX_ARG_VALUES): GRAMMAR['$constant_Measure(ms)'].append(('DURATION_' + str(i), )) # well known entities add_type('Entity(tt:device)', [(device, ) for device in self.devices], ['==']) #add_type('Entity(tt:device)', [], ['=']) # other entities for generic_entity, has_ner in self.entities: if has_ner: value_rules = [ ('GENERIC_ENTITY_' + generic_entity + "_" + str(i), ) for i in range(MAX_ARG_VALUES) ] value_rules.append(('$constant_String', )) value_rules.append(( '"', '$word_list', '"', '^^' + generic_entity, )) else: value_rules = [] add_type('Entity(' + generic_entity + ')', value_rules, ['==']) # maps a parameter to the list of types it can possibly have # over the whole Thingpedia param_types = OrderedDict() # add a parameter over the source param_types['source'] = OrderedSet() param_types['source'].add(('Entity(tt:contact)', 'out')) for function_type in ('queries', 'actions'): for function_name, params in self.functions[function_type].items(): GRAMMAR['$thingpedia_' + function_type].append( (function_name, )) for function_type in ('queries', 'actions'): for function_name, params in self.functions[function_type].items(): for param_name, param_type, param_direction in params: if param_type in TYPE_RENAMES: param_type = TYPE_RENAMES[param_type] if param_type.startswith('Array('): element_type = param_type[len('Array('):-1] if element_type in TYPE_RENAMES: param_type = 'Array(' + TYPE_RENAMES[ element_type] + ')' if param_name not in param_types: param_types[param_name] = OrderedSet() param_types[param_name].add((param_type, param_direction)) if param_direction == 'in': # add the corresponding in out direction too, so we can handle # filters on it for policies param_types[param_name].add((param_type, 'out')) for param_name, options in param_types.items(): for (param_type, param_direction) in options: if param_type.startswith('Enum('): enum_type = self._enum_types[param_type] for enum in enum_type: #GRAMMAR['$atom_filter'].add(('$out_param', '==', 'enum:' + enum)) if param_direction == 'in': GRAMMAR['$const_param'].append( ('param:' + param_name + ':' + param_type, '=', 'enum:' + enum)) else: # NOTE: enum filters don't follow the usual convention for filters # this is because, linguistically, it does not make much sense to go # through $out_param: enum parameters are often implicit # one does not say "if the mode of my hvac is off", one says "if my hvac is off" # (same, and worse, with booleans) GRAMMAR['$atom_filter'].append( ('param:' + param_name + ':' + param_type, '==', 'enum:' + enum)) else: if param_direction == 'out': if param_type != 'Boolean': GRAMMAR['$out_param_' + param_type].append( ('param:' + param_name + ':' + param_type, )) else: GRAMMAR['$atom_filter'].append( ('param:' + param_name + ':' + param_type, '==', 'true')) GRAMMAR['$atom_filter'].append( ('param:' + param_name + ':' + param_type, '==', 'false')) else: if param_type == 'String': GRAMMAR['$param_passing'].append( ('param:' + param_name + ':' + param_type, '=', '$out_param_Any')) GRAMMAR['$param_passing'].append( ('param:' + param_name + ':' + param_type, '=', 'event')) elif param_type.startswith('Entity('): GRAMMAR['$param_passing'].append( ('param:' + param_name + ':' + param_type, '=', '$out_param_' + param_type)) GRAMMAR['$param_passing'].append( ('param:' + param_name + ':' + param_type, '=', '$out_param_String')) else: GRAMMAR['$param_passing'].append( ('param:' + param_name + ':' + param_type, '=', '$out_param_' + param_type)) if param_direction == 'in': if param_type == 'Any': GRAMMAR['$const_param'].append( ('param:' + param_name + ':' + param_type, '=', '$constant_String')) elif param_type.startswith('Array('): GRAMMAR['$const_param'].append( ('param:' + param_name + ':' + param_type, '=', '$constant_Array')) elif param_type == 'Boolean': GRAMMAR['$const_param'].append( ('param:' + param_name + ':' + param_type, '=', 'true')) GRAMMAR['$const_param'].append( ('param:' + param_name + ':' + param_type, '=', 'false')) else: GRAMMAR['$const_param'].append( ('param:' + param_name + ':' + param_type, '=', '$constant_' + param_type)) self._grammar = GRAMMAR
def send_nodes(self): """ :return: iterable of send nodes """ from ngraph.transformers.hetr.hetr_utils import get_iterable return OrderedSet(i for i in get_iterable(self._send_node))
yrs = df['yr'].unique() df["MTH"] = pd.Categorical(df['MTH'], [ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ]) table = pd.pivot_table(df, index=['yr', 'ITEM_CLASS'], columns=['MTH'], aggfunc=np.sum) json_str = table.to_json( orient='split') #table.to_json('file.json',orient='split') #print json_str chk2 = json.loads(json_str) #json.load(open('file.json')) yrs = OrderedSet([i[0] for i in chk2['index']]) items = OrderedSet([i[1] for i in chk2['index']]) metrics = OrderedSet([i[0] for i in chk2['columns']]) mths = [ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ] indices = chk2['index'] datum = chk2['data'] cols = chk2['columns'] cuts = [len(datum[0]) / (len(metrics) - metrics.index(i)) for i in metrics] #print cuts productDict = {k: dict() for k in items}
def deal_hetong(id, extracted_list): result = list() temp_dic = dict([(u'party_a', OrderedSet()), (u'party_b', OrderedSet()), (u'project_name', OrderedSet()), (u'contract_name', OrderedSet()), (u'up_limit', OrderedSet()), (u'low_limit', OrderedSet()), (u'union_member', OrderedSet())]) for key, val in extracted_list: if key in ['low_limit', 'up_limit']: val, unit = convert_int_or_float(val) if val: if unit == '万': val *= 10000 elif unit == '亿': val *= 100000000 elif unit == '元': pass else: print('其他合同金额单位!') print('id ' + str(id)) print(str(val) + unit) continue temp_dic[key].add(val) # check if this contract only contains one line if not len(temp_dic['party_b']) == 1: print('非法:合同里面乙方为空') print('id: ' + str(id)) return result is_only_one = True for k, v in temp_dic.iteritems(): if len(v) > 1: is_only_one = False if not temp_dic['up_limit']: temp_dic['up_limit'] = temp_dic['low_limit'] ret_l = list() if is_only_one: append_something_to_final_list(temp_dic['party_a'], ret_l) append_something_to_final_list(temp_dic['party_b'], ret_l) append_something_to_final_list(temp_dic['project_name'], ret_l) append_something_to_final_list(temp_dic['contract_name'], ret_l) append_something_to_final_list(temp_dic['up_limit'], ret_l) append_something_to_final_list(temp_dic['low_limit'], ret_l) append_something_to_final_list(temp_dic['union_member'], ret_l) # only party_a and project_name can duplicate if ret_l: result.append(ret_l) if len(temp_dic['party_a']) == len( temp_dic['project_name']) and len(temp_dic['party_a']) > 1 and len( temp_dic['low_limit']) == len(temp_dic['party_a']): # case 1: multiple party_a with multiple project name for party_a, pro_name, limit in zip(listify(temp_dic['party_a']), listify(temp_dic['project_name']), listify(temp_dic['low_limit'])): ret = [ party_a, temp_dic['party_b'].pop() if temp_dic['party_b'] else '', pro_name, temp_dic['contract_name'].pop() if temp_dic['contract_name'] else '', limit, limit, temp_dic['union_member'].pop() if temp_dic['union_member'] else '' ] result.append(ret) elif len(temp_dic['project_name']) > 1 and len( temp_dic['project_name']) == len(temp_dic['low_limit']): # case 2: only project name varies for proj_name, limit in zip(listify(temp_dic['project_name']), listify(temp_dic['low_limit'])): ret = [ temp_dic['party_a'].pop() if temp_dic['party_a'] else '', temp_dic['party_b'].pop() if temp_dic['party_b'] else '', proj_name, temp_dic['contract_name'].pop() if temp_dic['contract_name'] else '', limit, limit, temp_dic['union_member'].pop() if temp_dic['union_member'] else '' ] result.append(ret) elif len(temp_dic['project_name']) <= 1 and len( temp_dic['party_a']) <= 1 and len(temp_dic['low_limit']) > 1: # case 3: error: multiple low_limit -> pick first if len(temp_dic['low_limit']) == 2: for limit in listify(temp_dic['low_limit']): ret = [ temp_dic['party_a'].pop() if temp_dic['party_a'] else '', temp_dic['party_b'].pop() if temp_dic['party_b'] else '', temp_dic['project_name'].pop() if temp_dic['project_name'] else '', temp_dic['contract_name'].pop() if temp_dic['contract_name'] else '', limit, limit, temp_dic['union_member'].pop() if temp_dic['union_member'] else '' ] result.append(ret) else: ret = [ temp_dic['party_a'].pop() if temp_dic['party_a'] else '', temp_dic['party_b'].pop() if temp_dic['party_b'] else '', temp_dic['project_name'].pop() if temp_dic['project_name'] else '', temp_dic['contract_name'].pop() if temp_dic['contract_name'] else '', temp_dic['up_limit'][0], temp_dic['low_limit'][0], temp_dic['union_member'].pop() if temp_dic['union_member'] else '' ] result.append(ret) elif len(temp_dic['contract_name']) > 1 and len( temp_dic['party_a']) == 1 and len( temp_dic['project_name']) == 1 and ( len(temp_dic['contract_name']) == len( temp_dic['low_limit']) and len(temp_dic['party_b']) == 1): # case 4: multiple contract name for con_name, limit in zip(listify(temp_dic['contract_name']), listify(temp_dic['low_limit'])): ret = [ temp_dic['party_a'].pop() if temp_dic['party_a'] else '', temp_dic['party_b'].pop() if temp_dic['party_b'] else '', temp_dic['project_name'].pop() if temp_dic['project_name'] else '', con_name, limit, limit, temp_dic['union_member'].pop() if temp_dic['union_member'] else '' ] result.append(ret) if not result: print('某些field非唯一!') print('id ' + str(id)) obj = dict([(k, list(v)) for k, v in temp_dic.iteritems()]) print(obj) # with open('/Users/polybahn/Desktop/temp/' + str(id) + '.json', 'wb') as error_f: # json.dump(obj, error_f) return result
from collections import OrderedDict from orderedset import OrderedSet OP1 = OrderedSet([ "bvnot", "bvneg", ]) OP2 = OrderedSet([ "bvadd", "bvsub", "bvmul", "bvudiv", "bvsdiv", "bvurem", "bvsrem", "bvshl", "bvlshr", "bvashr", "bvand", "bvor", "bvxor", "zero_extend", "bvconcat", ]) OP3 = OrderedSet(["bvextract", "sign_extend"]) COMMUTATIVE_OPS = OrderedSet(["bvadd", "bvmul", "bvand", "bvor", "bvxor"]) NON_TERMINALS = OrderedSet([ "u8",
def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False): ''' Stores a torrent to the database. May throw TorrentExtraValidationException if the form/torrent fails post-WTForm validation! Exception messages will also be added to their relevant fields on the given form. ''' torrent_data = upload_form.torrent_file.parsed_data # Anonymous uploaders and non-trusted uploaders no_or_new_account = ( not uploading_user or (uploading_user.age < app.config['RATELIMIT_ACCOUNT_AGE'] and not uploading_user.is_trusted)) if app.config['RATELIMIT_UPLOADS'] and no_or_new_account: now, torrent_count, next_time = check_uploader_ratelimit( uploading_user) if next_time > now: # This will flag the dialog in upload.html red and tell API users what's wrong upload_form.ratelimit.errors = [ "You've gone over the upload ratelimit." ] raise TorrentExtraValidationException() if not uploading_user: if app.config['RAID_MODE_LIMIT_UPLOADS']: # XXX TODO: rename rangebanned to something more generic upload_form.rangebanned.errors = [ app.config['RAID_MODE_UPLOADS_MESSAGE'] ] raise TorrentExtraValidationException() elif models.RangeBan.is_rangebanned( ip_address(flask.request.remote_addr).packed): upload_form.rangebanned.errors = [ "Your IP is banned from " "uploading anonymously." ] raise TorrentExtraValidationException() # Delete existing torrent which is marked as deleted if torrent_data.db_id is not None: old_torrent = models.Torrent.by_id(torrent_data.db_id) db.session.delete(old_torrent) db.session.commit() # Delete physical file after transaction has been committed _delete_info_dict(old_torrent) # The torrent has been validated and is safe to access with ['foo'] etc - all relevant # keys and values have been checked for (see UploadForm in forms.py for details) info_dict = torrent_data.torrent_dict['info'] changed_to_utf8 = _replace_utf8_values(torrent_data.torrent_dict) # Use uploader-given name or grab it from the torrent display_name = upload_form.display_name.data.strip( ) or info_dict['name'].decode('utf8').strip() information = (upload_form.information.data or '').strip() description = (upload_form.description.data or '').strip() # Sanitize fields display_name = sanitize_string(display_name) information = sanitize_string(information) description = sanitize_string(description) torrent_filesize = info_dict.get('length') or sum( f['length'] for f in info_dict.get('files')) # In case no encoding, assume UTF-8. torrent_encoding = torrent_data.torrent_dict.get('encoding', b'utf-8').decode('utf-8') torrent = models.Torrent(id=torrent_data.db_id, info_hash=torrent_data.info_hash, display_name=display_name, torrent_name=torrent_data.filename, information=information, description=description, encoding=torrent_encoding, filesize=torrent_filesize, user=uploading_user, uploader_ip=ip_address( flask.request.remote_addr).packed) # Store bencoded info_dict info_dict_path = torrent.info_dict_path info_dict_dir = os.path.dirname(info_dict_path) os.makedirs(info_dict_dir, exist_ok=True) with open(info_dict_path, 'wb') as out_file: out_file.write(torrent_data.bencoded_info_dict) torrent.stats = models.Statistic() torrent.has_torrent = True # Fields with default value will be None before first commit, so set .flags torrent.flags = 0 torrent.anonymous = upload_form.is_anonymous.data if uploading_user else True torrent.hidden = upload_form.is_hidden.data torrent.remake = upload_form.is_remake.data torrent.complete = upload_form.is_complete.data # Copy trusted status from user if possible can_mark_trusted = uploading_user and uploading_user.is_trusted # To do, automatically mark trusted if user is trusted unless user specifies otherwise torrent.trusted = upload_form.is_trusted.data if can_mark_trusted else False # Only allow mods to upload locked torrents can_mark_locked = uploading_user and uploading_user.is_moderator torrent.comment_locked = upload_form.is_comment_locked.data if can_mark_locked else False # Set category ids torrent.main_category_id, torrent.sub_category_id = \ upload_form.category.parsed_data.get_category_ids() # To simplify parsing the filelist, turn single-file torrent into a list torrent_filelist = info_dict.get('files') used_path_encoding = changed_to_utf8 and 'utf-8' or torrent_encoding parsed_file_tree = dict() if not torrent_filelist: # If single-file, the root will be the file-tree (no directory) file_tree_root = parsed_file_tree torrent_filelist = [{ 'length': torrent_filesize, 'path': [info_dict['name']] }] else: # If multi-file, use the directory name as root for files file_tree_root = parsed_file_tree.setdefault( info_dict['name'].decode(used_path_encoding), {}) # Parse file dicts into a tree for file_dict in torrent_filelist: # Decode path parts from utf8-bytes path_parts = [ path_part.decode(used_path_encoding) for path_part in file_dict['path'] ] filename = path_parts.pop() current_directory = file_tree_root for directory in path_parts: current_directory = current_directory.setdefault(directory, {}) # Don't add empty filenames (BitComet directory) if filename: current_directory[filename] = file_dict['length'] parsed_file_tree = utils.sorted_pathdict(parsed_file_tree) json_bytes = json.dumps(parsed_file_tree, separators=(',', ':')).encode('utf8') torrent.filelist = models.TorrentFilelist(filelist_blob=json_bytes) db.session.add(torrent) db.session.flush() # Store the users trackers trackers = OrderedSet() announce = torrent_data.torrent_dict.get('announce', b'').decode('ascii') if announce: trackers.add(announce) # List of lists with single item announce_list = torrent_data.torrent_dict.get('announce-list', []) for announce in announce_list: trackers.add(announce[0].decode('ascii')) # Store webseeds # qBittorrent doesn't omit url-list but sets it as '' even when there are no webseeds webseed_list = torrent_data.torrent_dict.get('url-list') or [] if isinstance(webseed_list, bytes): webseed_list = [webseed_list ] # qB doesn't contain a sole url in a list webseeds = OrderedSet(webseed.decode('utf-8') for webseed in webseed_list) # Remove our trackers, maybe? TODO ? # Search for/Add trackers in DB db_trackers = OrderedSet() for announce in trackers: tracker = models.Trackers.by_uri(announce) # Insert new tracker if not found if not tracker: tracker = models.Trackers(uri=announce) db.session.add(tracker) db.session.flush() elif tracker.is_webseed: # If we have an announce marked webseed (user error, malicy?), reset it. # Better to have "bad" announces than "hiding" proper announces in webseeds/url-list. tracker.is_webseed = False db.session.flush() db_trackers.add(tracker) # Same for webseeds for webseed_url in webseeds: webseed = models.Trackers.by_uri(webseed_url) if not webseed: webseed = models.Trackers(uri=webseed_url, is_webseed=True) db.session.add(webseed) db.session.flush() # Don't add trackers into webseeds if webseed.is_webseed: db_trackers.add(webseed) # Store tracker refs in DB for order, tracker in enumerate(db_trackers): torrent_tracker = models.TorrentTrackers(torrent_id=torrent.id, tracker_id=tracker.id, order=order) db.session.add(torrent_tracker) # Before final commit, validate the torrent again validate_torrent_post_upload(torrent, upload_form) # Add to tracker whitelist db.session.add(models.TrackerApi(torrent.info_hash, 'insert')) db.session.commit() # Store the actual torrent file as well torrent_file = upload_form.torrent_file.data if app.config.get('BACKUP_TORRENT_FOLDER'): torrent_file.seek(0, 0) torrent_dir = app.config['BACKUP_TORRENT_FOLDER'] os.makedirs(torrent_dir, exist_ok=True) torrent_path = os.path.join( torrent_dir, '{}.{}'.format(torrent.id, secure_filename(torrent_file.filename))) torrent_file.save(torrent_path) torrent_file.close() return torrent
def __init__( self, intensities, lattice_ids, weights=None, min_pairs=None, lattice_group=None, dimensions=None, nproc=1, ): r""" "Intialise a Target object. Args: intensities (cctbx.miller.array): The intensities on which to perform cosym anaylsis. lattice_ids (scitbx.array_family.flex.int): An array of equal size to `intensities` which maps each reflection to a given lattice (dataset). weights (str): Optionally include weights in the target function. Allowed values are `None`, "count" and "standard_error". The default is to use no weights. If "count" is set, then weights are equal to the number of pairs of reflections used in calculating each value of the rij matrix. If "standard_error" is used, then weights are defined as :math:`w_{ij} = 1/s`, where :math:`s = \sqrt{(1-r_{ij}^2)/(n-2)}`. See also http://www.sjsu.edu/faculty/gerstman/StatPrimer/correlation.pdf. min_pairs (int): Only calculate the correlation coefficient between two datasets if they have more than `min_pairs` of common reflections. lattice_group (cctbx.sgtbx.space_group): Optionally set the lattice group to be used in the analysis. dimensions (int): Optionally override the number of dimensions to be used in the analysis. If not set, then the number of dimensions used is equal to the greater of 2 or the number of symmetry operations in the lattice group. nproc (int): number of processors to use for computing the rij matrix. """ if weights is not None: assert weights in ("count", "standard_error") self._weights = weights self._min_pairs = min_pairs self._nproc = nproc data = intensities.customized_copy(anomalous_flag=False) cb_op_to_primitive = data.change_of_basis_op_to_primitive_setting() data = data.change_basis(cb_op_to_primitive).map_to_asu() order = flex.sort_permutation(lattice_ids) sorted_lattice_id = flex.select(lattice_ids, order) sorted_data = data.data().select(order) sorted_indices = data.indices().select(order) self._lattice_ids = sorted_lattice_id self._data = data.customized_copy(indices=sorted_indices, data=sorted_data) assert isinstance(self._data.indices(), type(flex.miller_index())) assert isinstance(self._data.data(), type(flex.double())) # construct a lookup for the separate lattices last_id = -1 self._lattices = flex.int() for n, lid in enumerate(self._lattice_ids): if lid != last_id: last_id = lid self._lattices.append(n) self._sym_ops = OrderedSet(["x,y,z"]) self._lattice_group = lattice_group self._sym_ops.update(op.as_xyz() for op in self._generate_twin_operators()) if dimensions is None: dimensions = max(2, len(self._sym_ops)) self.set_dimensions(dimensions) self._lattice_group = copy.deepcopy(self._data.space_group()) for sym_op in self._sym_ops: self._lattice_group.expand_smx(sym_op) self._patterson_group = self._lattice_group.build_derived_patterson_group() logger.debug( "Lattice group: %s (%i symops)" % (self._lattice_group.info().symbol_and_number(), len(self._lattice_group)) ) logger.debug( "Patterson group: %s" % self._patterson_group.info().symbol_and_number() ) self._compute_rij_wij()
import os import base64 import time from urllib.parse import urlencode from orderedset import OrderedSet from nyaa import app from nyaa import bencode from nyaa import app from nyaa import models USED_TRACKERS = OrderedSet() def read_trackers_from_file(file_object): USED_TRACKERS.clear() for line in file_object: line = line.strip() if line: USED_TRACKERS.add(line) return USED_TRACKERS def read_trackers(): tracker_list_file = os.path.join(app.config['BASE_DIR'], 'trackers.txt') if os.path.exists(tracker_list_file): with open(tracker_list_file, 'r') as in_file: return read_trackers_from_file(in_file)
def _deserialize_graph(graph_pb): """ Will deserialize a graph and return the list of all ops in that graph. Does not bother filtering down to only the original set of ops the user passed in for serialization (if that's what the user desired upon serializing with the serialization only_return_handle_ops parameter). """ # For safety we clear this registry GLOBAL_AXIS_REGISTRY.clear() ops = list(map(protobuf_to_op, graph_pb.ops)) uuid_lookup = {op.uuid.bytes: op for op in ops} for edge in graph_pb.edges: head_op = uuid_lookup[edge.from_uuid.uuid] tail_op = uuid_lookup[edge.to_uuid.uuid] if edge.edge_type == ops_pb.Edge.DATA: # args tail_op._args = tail_op._args + (head_op, ) elif edge.edge_type == ops_pb.Edge.CONTROL: # control_deps head_op._control_deps.add(tail_op) elif edge.edge_type == ops_pb.Edge.CONTAINER: if '_ngraph_forward' in edge.attrs: # forward head_op._forward = tail_op else: # ops if not hasattr(head_op, '_ops'): head_op._ops = [] head_op._ops.append(tail_op) elif edge.edge_type == ops_pb.Edge.OTHER: if '_ngraph_attribute' in edge.attrs: setattr(head_op, edge.attrs['_ngraph_attribute'].scalar.string_val, tail_op) elif '_ngraph_list_attribute' in edge.attrs: key = edge.attrs['_ngraph_list_attribute'].scalar.string_val # import pdb; pdb.set_trace() if hasattr(head_op, key): getattr(head_op, key).add(tail_op) else: setattr(head_op, key, OrderedSet([tail_op])) else: raise ValueError("Edge not mapped to op: {}".format(edge)) # This must come after tensor has been set which occurs after edges # op.dtype for py_op, pb_op in zip(ops, graph_pb.ops): py_op.dtype = pb_to_dtype(pb_op.dtype) # Done with this and don't want it to bleed to subsequent serializations GLOBAL_AXIS_REGISTRY.clear() # Assemble list of nodes to return depending on if the user wanted to # return all ops or only those that were originally serialized (and # implicitly those upstream) final_ops = [] for op in ops: if hasattr(op, '_ngraph_ser_handle'): del op._ngraph_ser_handle final_ops.append(op) if len(final_ops) > 0: return final_ops else: return ops
def write_output(datasets, statics, options, run_list, kind=None): if options.output is None: return all_result_types = OrderedSet() for testie, build, all_results in datasets: for run, run_results in all_results.items(): for result_type, results in run_results.items(): all_result_types.add(result_type) for testie, build, all_results in datasets: csvs = OrderedDict() for run in run_list: results_types = all_results.get(run, OrderedDict()) for result_type in all_result_types: if result_type in csvs: type_filename, csvfile, wr = csvs[result_type] else: type_filename = npf.build_filename( testie, build, options.output if options.output != 'graph' else options.graph_filename, statics, 'csv', type_str=result_type, show_serie=(len(datasets) > 1 or options.show_serie), force_ext=True, data_folder=True, prefix=kind + '-' if kind else None) csvfile = open(type_filename, 'w') wr = csv.writer(csvfile, delimiter=' ', quotechar='"', quoting=csv.QUOTE_MINIMAL) csvs[result_type] = (type_filename, csvfile, wr) result = results_types.get(result_type, None) if result is not None: row = [] for t in options.output_columns: if t == 'x': for var, val in run.variables.items(): if var in statics: continue row.append(val) elif t == 'all_x': for var, val in run.variables.items(): row.append(val) elif t == 'raw': row.extend(result) else: yval = group_val(result, t) if yval is not None: try: it = iter(yval) row.extend(yval) except TypeError as te: row.append(yval) if row: wr.writerow(row) for result_type in csvs.keys(): if options.output is not None: print("Output written to %s" % csvs[result_type][0]) csvs[result_type][1].close()
def differences(index, nameXml, lots_fab, lots_solveur, data_fab, data_solveur): dates_non_proposees = lots_oubli(lots_fab, lots_solveur, data_fab, data_solveur) dates_proposees = solveur_hours(nameXml, lots_fab, lots_solveur, data_fab, data_solveur) #en ordre chronologique cles_lots = [] res = dict([(key, []) for key in dates_non_proposees.keys()]) '''for k,v in dates_non_proposees.items(): print(k,v)''' for k in dates_non_proposees.keys(): for i in range(len(dates_non_proposees.get(k))): for j in range(len(dates_proposees.get(k))): l = [] if (dates_non_proposees.get(k)[i][2] < dates_proposees.get(k) [j][2]): #comparaison des dates d'arrivée en atelier l = [ dates_non_proposees.get(k)[i][0], dates_non_proposees.get(k)[i][2], dates_non_proposees.get(k)[i][3], dates_non_proposees.get(k)[i][4], dates_non_proposees.get(k)[i][5], dates_proposees.get(k)[j][0], dates_proposees.get(k)[j][2], dates_proposees.get(k)[j][3], dates_proposees.get(k)[j][4], dates_proposees.get(k)[j][5] ] res[k].append(l) cles_lots.append(dates_non_proposees.get(k)[i][0]) #print(l) cles = [] for k in res.keys(): if (len(res.get(k)) > 0): cles.append(k) final = dict([(key, []) for key in cles]) for k in final.keys(): final[k] = res.get(k) #print(cles_lots) value = dict([(key, []) for key in cles_lots]) l = OrderedDict([ (key, []) for key in cles_lots ]) #pour éliminer les doublons en évitant de mélanger des choses for k, v in final.items(): print(k, v) for k in final.keys(): for i in range(len(final.get(k))): if (final.get(k)[i][0] in value.keys()): l[final.get(k)[i][0]].append(k) l[final.get(k)[i][0]].append( final.get(k)[i][0]) #nom lot non proposé l[final.get(k)[i][0]].append( final.get(k)[i][1]) #date entrée en fab non proposé l[final.get(k)[i][0]].append( final.get(k)[i][2]) #priorité non proposé l[final.get(k)[i][0]].append( final.get(k)[i][3]) #route non proposé l[final.get(k)[i][0]].append( final.get(k)[i][4]) #date début de traitement non proposé value[final.get(k)[i][0]].append( final.get(k)[i][5]) # nom proposé value[final.get(k)[i][0]].append( (final.get(k)[i][6] - final.get(k)[i][1] )) #écart temporel proposé par rapport à non proposé value[final.get(k)[i][0]].append( final.get(k)[i][7]) #priorité proposé value[final.get(k)[i][0]].append( final.get(k)[i][4] - final.get(k)[i][9] ) #date début de traitement proposé - date début de traitement non proposé final.get(k)[i][4] - final.get(k)[i][ 1] #début traitement - début entrée en fab () -> non proposé final.get(k)[i][9] #for k in l.keys(): #print(OrderedSet(l.get(k))) workbook = openpyxl.load_workbook('Resultats_Propositions.xlsx') worksheet = workbook.create_sheet(index) worksheet.cell(1, 1).value = nameXml worksheet.cell(2, 1).value = "FAB" worksheet.cell(3, 1).value = "Masks" worksheet.cell(3, 2).value = "Lots" worksheet.cell(3, 3).value = "Operation" worksheet.cell(3, 4).value = "Priority" worksheet.cell(3, 5).value = "RouteStep" worksheet.cell(2, 8).value = "SOLVEUR" worksheet.cell(3, 8).value = "Lots" worksheet.cell(3, 9).value = "Diff_Entree" worksheet.cell(3, 10).value = "Priority" #worksheet.cell(3,9).value = "RouteStep" worksheet.cell(3, 11).value = "Diff_Traitement" ligne = 4 for k in value.keys(): for i in range(len(value.get(k))): worksheet.cell(ligne, 1).value = OrderedSet(l.get(k))[0] #masque worksheet.cell(ligne, 2).value = k #lot worksheet.cell(ligne, 3).value = OrderedSet(l.get(k))[2] worksheet.cell(ligne, 4).value = OrderedSet(l.get(k))[3] #priorité worksheet.cell(ligne, 5).value = OrderedSet(l.get(k))[4] #route worksheet.cell(ligne, i + 8).value = value.get(k)[i] #info ligne = ligne + 1 workbook.save('Resultats_Propositions.xlsx')
def convert_to_xyeb(datasets: List[Tuple['Testie', 'Build', Dataset]], run_list, key, do_x_sort, statics, options, max_series=None, series_sort=None, y_group={}, color=[], kind=None) -> AllXYEB: write_output(datasets, statics, options, run_list, kind) data_types = OrderedDict() all_result_types = OrderedSet() for testie, build, all_results in datasets: for run, run_results in all_results.items(): for result_type, results in run_results.items(): all_result_types.add(result_type) for testie, build, all_results in datasets: x = OrderedDict() y = OrderedDict() e = OrderedDict() for run in run_list: if len(run) == 0: xval = build.pretty_name() else: xval = run.print_variable(key, build.pretty_name()) results_types = all_results.get(run, OrderedDict()) for result_type in all_result_types: #ydiv = var_divider(testie, "result", result_type) results are now divided before xdiv = var_divider(testie, key) result = results_types.get(result_type, None) if xdiv != 1 and is_numeric(xval): x.setdefault(result_type, []).append(get_numeric(xval) / xdiv) else: x.setdefault(result_type, []).append(xval) if result is not None: yval = group_val( result, y_group[result_type] if result_type in y_group else (y_group['result'] if 'result' in y_group else 'mean')) y.setdefault(result_type, []).append(yval) std = np.std(result) mean = np.mean(result) e.setdefault(result_type, []).append((mean, std, result)) else: y.setdefault(result_type, []).append(np.nan) e.setdefault(result_type, []).append( (np.nan, np.nan, [np.nan])) for result_type in x.keys(): try: if not do_x_sort: ox = x[result_type] oy = y[result_type] oe = e[result_type] else: order = np.argsort(x[result_type]) ox = np.array(x[result_type])[order] oy = np.array(y[result_type])[order] oe = [e[result_type][i] for i in order] data_types.setdefault(result_type, []).append( (ox, oy, oe, build)) except Exception as err: print("ERROR while transforming data") print(err) print("x", x[result_type]) print("y", y[result_type]) print("e", e[result_type]) if series_sort is not None and series_sort != "": if type(series_sort) is str and series_sort.startswith('-'): inverted = True series_sort = series_sort[1:] else: inverted = False new_data_types = OrderedDict() for result_type, data in data_types.items(): avg = [] max = [] min = [] for x, y, e, build in data: if not np.isnan(np.sum(y)): avg.append(np.sum(y)) else: avg.append(0) max.append(np.max(y)) min.append(np.min(y)) if type(series_sort) is list: ok = True for i, so in enumerate(series_sort): if is_numeric(so): o = so if o >= len(data): print( "ERROR: sorting for %s is invalid, %d is out of range" % (result_type, o)) ok = False break elif so in [x for x, y, e, build in data]: o = [x for x, y, e, build in data].index(so) elif so in [ build.pretty_name() for x, y, e, build in data ]: o = [build.pretty_name() for x, y, e, build in data].index(so) else: print( "ERROR: sorting for %s is invalid, %s is not in list" % (result_type, so)) ok = False break series_sort[i] = o if ok: order = series_sort else: order = np.argsort(np.asarray(avg)) elif series_sort == 'avg': order = np.argsort(np.asarray(avg)) elif series_sort == 'max': order = np.argsort(-np.asarray(max)) elif series_sort == 'min': order = np.argsort(np.asarray(min)) elif series_sort == 'natsort': order = natsort.index_natsorted( data, key=lambda x: x[3].pretty_name()) elif series_sort == 'color': order = np.argsort(color) else: raise Exception("Unknown sorting : %s" % series_sort) if inverted: order = np.flip(order, 0) data = [data[i] for i in order] new_data_types[result_type] = data data_types = new_data_types if max_series: new_data_types = OrderedDict() for i, (result_type, data) in enumerate(data_types.items()): new_data_types[result_type] = data[:max_series] data_types = new_data_types return data_types
def test_iterable(self): numbers = OrderedSet([1, 2, 3, 4]) self.assertEqual(set(numbers), {1, 2, 3, 4})
def handle_torrent_upload(upload_form, uploading_user=None): torrent_data = upload_form.torrent_file.parsed_data # The torrent has been validated and is safe to access with ['foo'] etc - all relevant # keys and values have been checked for (see UploadForm in forms.py for details) info_dict = torrent_data.torrent_dict['info'] changed_to_utf8 = _replace_utf8_values(torrent_data.torrent_dict) # Use uploader-given name or grab it from the torrent display_name = upload_form.display_name.data.strip( ) or info_dict['name'].decode('utf8').strip() information = (upload_form.information.data or '').strip() description = (upload_form.description.data or '').strip() torrent_filesize = info_dict.get('length') or sum( f['length'] for f in info_dict.get('files')) # In case no encoding, assume UTF-8. torrent_encoding = torrent_data.torrent_dict.get('encoding', b'utf-8').decode('utf-8') torrent = models.Torrent(info_hash=torrent_data.info_hash, display_name=display_name, torrent_name=torrent_data.filename, information=information, description=description, encoding=torrent_encoding, filesize=torrent_filesize, user=uploading_user) # Store bencoded info_dict torrent.info = models.TorrentInfo( info_dict=torrent_data.bencoded_info_dict) torrent.stats = models.Statistic() torrent.has_torrent = True # Fields with default value will be None before first commit, so set .flags torrent.flags = 0 torrent.anonymous = upload_form.is_anonymous.data if uploading_user else True torrent.hidden = upload_form.is_hidden.data torrent.remake = upload_form.is_remake.data torrent.complete = upload_form.is_complete.data # Copy trusted status from user if possible torrent.trusted = (uploading_user.level >= models.UserLevelType.TRUSTED ) if uploading_user else False # Set category ids torrent.main_category_id, torrent.sub_category_id = upload_form.category.parsed_data.get_category_ids( ) # print('Main cat id: {0}, Sub cat id: {1}'.format( # torrent.main_category_id, torrent.sub_category_id)) # To simplify parsing the filelist, turn single-file torrent into a list torrent_filelist = info_dict.get('files') used_path_encoding = changed_to_utf8 and 'utf-8' or torrent_encoding parsed_file_tree = dict() if not torrent_filelist: # If single-file, the root will be the file-tree (no directory) file_tree_root = parsed_file_tree torrent_filelist = [{ 'length': torrent_filesize, 'path': [info_dict['name']] }] else: # If multi-file, use the directory name as root for files file_tree_root = parsed_file_tree.setdefault( info_dict['name'].decode(used_path_encoding), {}) # Parse file dicts into a tree for file_dict in torrent_filelist: # Decode path parts from utf8-bytes path_parts = [ path_part.decode(used_path_encoding) for path_part in file_dict['path'] ] filename = path_parts.pop() current_directory = file_tree_root for directory in path_parts: current_directory = current_directory.setdefault(directory, {}) current_directory[filename] = file_dict['length'] parsed_file_tree = utils.sorted_pathdict(parsed_file_tree) json_bytes = json.dumps(parsed_file_tree, separators=(',', ':')).encode('utf8') torrent.filelist = models.TorrentFilelist(filelist_blob=json_bytes) db.session.add(torrent) db.session.flush() # Store the users trackers trackers = OrderedSet() announce = torrent_data.torrent_dict.get('announce', b'').decode('ascii') if announce: trackers.add(announce) # List of lists with single item announce_list = torrent_data.torrent_dict.get('announce-list', []) for announce in announce_list: trackers.add(announce[0].decode('ascii')) # Remove our trackers, maybe? TODO ? # Search for/Add trackers in DB db_trackers = OrderedSet() for announce in trackers: tracker = models.Trackers.by_uri(announce) # Insert new tracker if not found if not tracker: tracker = models.Trackers(uri=announce) db.session.add(tracker) db_trackers.add(tracker) db.session.flush() # Store tracker refs in DB for order, tracker in enumerate(db_trackers): torrent_tracker = models.TorrentTrackers(torrent_id=torrent.id, tracker_id=tracker.id, order=order) db.session.add(torrent_tracker) db.session.commit() # Store the actual torrent file as well torrent_file = upload_form.torrent_file.data if app.config.get('BACKUP_TORRENT_FOLDER'): torrent_file.seek(0, 0) torrent_dir = app.config['BACKUP_TORRENT_FOLDER'] if not os.path.exists(torrent_dir): os.makedirs(torrent_dir) torrent_path = os.path.join( torrent_dir, '{}.{}'.format(torrent.id, secure_filename(torrent_file.filename))) torrent_file.save(torrent_path) torrent_file.close() return torrent
def test_maintains_order_and_uniqueness(self): string = "Hello world. This string contains many characters in it." expected = "Helo wrd.Thistngcamy" characters = OrderedSet(string) self.assertEqual("".join(characters), expected)
def test_constructor_with_iterables(self): OrderedSet([1, 2, 3, 4]) OrderedSet(n**2 for n in [1, 2, 3])