def testMultiResponseParsers(self): class FooParser(parsers.MultiResponseParser): supported_artifacts = ["Foo"] def ParseResponses(self, knowledge_base, responses): raise NotImplementedError() class BarParser(parsers.MultiResponseParser): supported_artifacts = ["Bar"] def ParseResponses(self, knowledge_base, responses): raise NotImplementedError() parsers.MULTI_RESPONSE_PARSER_FACTORY.Register("Foo", FooParser) parsers.MULTI_RESPONSE_PARSER_FACTORY.Register("Bar", BarParser) foo_factory = parsers.ArtifactParserFactory("Foo") foo_parsers = foo_factory.MultiResponseParsers() self.assertCountEqual(map(type, foo_parsers), [FooParser]) bar_factory = parsers.ArtifactParserFactory("Bar") bar_parsers = bar_factory.MultiResponseParsers() self.assertCountEqual(map(type, bar_parsers), [BarParser])
def setUp(self): self.positive_fps = list( map( get_data_path, [ "genbank_5_blanks_start_of_file", "genbank_single_record_upper", "genbank_single_record_lower", "genbank_multi_records", ], ) ) self.negative_fps = list( map( get_data_path, [ "empty", "whitespace_only", "genbank_6_blanks_start_of_file", "genbank_w_beginning_whitespace", "genbank_missing_locus_name", ], ) )
def correct_blinking(tracks, temporal, spatial): new_tracks = [] init = 0 while init < len(tracks): cur = {init} tr1 = tracks[init] for i in range(init+1, len(tracks)): tr2 = tracks[i] if min(tr2['frame']) - max(tr1['frame']) > temporal: break elif distance((tr1['xmean'], tr1['ymean']), (tr2['xmean'], tr2['ymean'])) <= spatial: cur.add(i) new_tracks.append(cur) init += 1 cur = {init} first = new_tracks[::-1] second = map(lambda a: a | reduce(lambda b, c: b | c, filter(lambda d: d & a, first)), first) while second != first: first = second second = map(lambda a: a | reduce(lambda b, c: b | c, filter(lambda d: d & a, first)), first) second = np.unique([tuple(a) for a in second]) tracks = [join_tracks([tracks[i] for i in links]) for links in second] with open('outfile.txt', 'w') as outf: outf.write('\n\n'.join([("%s:\t%s" % (k, v)) for k, v in tracks[100].items()])) return tracks
def test_roundtrip_tabular_msa(self): fps = list(map(lambda e: list(map(get_data_path, e)), [('empty', 'empty'), ('fasta_tabular_msa_different_type', 'qual_tabular_msa_different_type')])) reader = partial(_fasta_to_tabular_msa, constructor=CustomSequence) writer = _tabular_msa_to_fasta for fasta_fp, qual_fp in fps: # read obj1 = reader(fasta_fp, qual=qual_fp) # write fasta_fh = io.StringIO() qual_fh = io.StringIO() writer(obj1, fasta_fh, qual=qual_fh) fasta_fh.seek(0) qual_fh.seek(0) # read obj2 = reader(fasta_fh, qual=qual_fh) fasta_fh.close() qual_fh.close() self.assertEqual(obj1, obj2)
def test_roundtrip_biological_sequences(self): fps = list(map(lambda e: list(map(get_data_path, e)), [('fasta_multi_seq_roundtrip', 'qual_multi_seq_roundtrip'), ('fasta_sequence_collection_different_type', 'qual_sequence_collection_different_type')])) for reader, writer in ((_fasta_to_biological_sequence, _biological_sequence_to_fasta), (partial(_fasta_to_dna_sequence, validate=False), _dna_sequence_to_fasta), (partial(_fasta_to_rna_sequence, validate=False), _rna_sequence_to_fasta), (partial(_fasta_to_protein_sequence, validate=False), _protein_sequence_to_fasta)): for fasta_fp, qual_fp in fps: # read obj1 = reader(fasta_fp, qual=qual_fp) # write fasta_fh = io.StringIO() qual_fh = io.StringIO() writer(obj1, fasta_fh, qual=qual_fh) fasta_fh.seek(0) qual_fh.seek(0) # read obj2 = reader(fasta_fh, qual=qual_fh) fasta_fh.close() qual_fh.close() self.assertEqual(obj1, obj2)
def testLargeNumberOfBlobs(self): def Blobs(prefix): for idx in range(1337): yield prefix + str(idx).encode("ascii") foo_blobs = list(Blobs(b"foo")) foo_blob_ids = list(map(rdf_objects.BlobID.FromBlobData, foo_blobs)) foo_hash_id = rdf_objects.SHA256HashID.FromData(b"".join(foo_blobs)) data_store.BLOBS.WriteBlobs(dict(zip(foo_blob_ids, foo_blobs))) bar_blobs = list(Blobs(b"bar")) bar_blob_ids = list(map(rdf_objects.BlobID.FromBlobData, bar_blobs)) bar_hash_id = rdf_objects.SHA256HashID.FromData(b"".join(bar_blobs)) data_store.BLOBS.WriteBlobs(dict(zip(bar_blob_ids, bar_blobs))) client_id = self.SetupClient(0).Basename() foo_path = db.ClientPath.OS(client_id=client_id, components=("foo",)) bar_path = db.ClientPath.OS(client_id=client_id, components=("bar",)) with mock.patch.object(file_store, "_BLOBS_READ_BATCH_SIZE", 42): hash_ids = file_store.AddFilesWithUnknownHashes({ foo_path: foo_blob_ids, bar_path: bar_blob_ids, }) self.assertLen(hash_ids, 2) self.assertEqual(hash_ids[foo_path], foo_hash_id) self.assertEqual(hash_ids[bar_path], bar_hash_id)
def test_roundtrip_generators(self): # test that fasta and qual files can be streamed into memory and back # out to disk using generator reader and writer fps = list(map(lambda e: list(map(get_data_path, e)), [('empty', 'empty'), ('fasta_multi_seq_roundtrip', 'qual_multi_seq_roundtrip')])) for fasta_fp, qual_fp in fps: with io.open(fasta_fp) as fh: exp_fasta = fh.read() with io.open(qual_fp) as fh: exp_qual = fh.read() fasta_fh = io.StringIO() qual_fh = io.StringIO() _generator_to_fasta(_fasta_to_generator(fasta_fp, qual=qual_fp), fasta_fh, qual=qual_fh) obs_fasta = fasta_fh.getvalue() obs_qual = qual_fh.getvalue() fasta_fh.close() qual_fh.close() self.assertEqual(obs_fasta, exp_fasta) self.assertEqual(obs_qual, exp_qual)
def testSimpleOverlappingBlobIds(self): foo_blobs = [b"foo", b"norf", b"quux", b"thud"] bar_blobs = [b"bar", b"norf", b"blag", b"thud"] foo_blob_ids = list(map(rdf_objects.BlobID.FromBlobData, foo_blobs)) foo_hash_id = rdf_objects.SHA256HashID.FromData(b"".join(foo_blobs)) bar_blob_ids = list(map(rdf_objects.BlobID.FromBlobData, bar_blobs)) bar_hash_id = rdf_objects.SHA256HashID.FromData(b"".join(bar_blobs)) data_store.BLOBS.WriteBlobs(dict(zip(foo_blob_ids, foo_blobs))) data_store.BLOBS.WriteBlobs(dict(zip(bar_blob_ids, bar_blobs))) client_id = self.SetupClient(0).Basename() foo_path = db.ClientPath.OS(client_id=client_id, components=("foo", "quux")) bar_path = db.ClientPath.OS(client_id=client_id, components=("bar", "blag")) hash_ids = file_store.AddFilesWithUnknownHashes({ foo_path: foo_blob_ids, bar_path: bar_blob_ids, }) self.assertLen(hash_ids, 2) self.assertEqual(hash_ids[foo_path], foo_hash_id) self.assertEqual(hash_ids[bar_path], bar_hash_id)
def testWithFiles(self): foo = temp.AutoTempFilePath(suffix="foo") bar = temp.AutoTempFilePath(suffix="bar") baz = temp.AutoTempFilePath(suffix="baz") with context.MultiContext([foo, bar, baz]) as filepaths: self.assertLen(filepaths, 3) self.assertEndsWith(filepaths[0], "foo") self.assertEndsWith(filepaths[1], "bar") self.assertEndsWith(filepaths[2], "baz") wbopen = functools.partial(io.open, mode="wb") with context.MultiContext(map(wbopen, filepaths)) as filedescs: self.assertLen(filedescs, 3) filedescs[0].write(b"FOO") filedescs[1].write(b"BAR") filedescs[2].write(b"BAZ") # At this point all three files should be correctly written, closed and # ready for reading. rbopen = functools.partial(io.open, mode="rb") with context.MultiContext(map(rbopen, filepaths)) as filedescs: self.assertLen(filedescs, 3) self.assertEqual(filedescs[0].read(), b"FOO") self.assertEqual(filedescs[1].read(), b"BAR") self.assertEqual(filedescs[2].read(), b"BAZ")
def test_collate(self): u"""Test collate_iters function""" indicies = [index(i) for i in [0, 1, 2, 3]] helper = lambda i: indicies[i] makeiter1 = lambda: iter(indicies) makeiter2 = lambda: map(helper, [0, 1, 3]) makeiter3 = lambda: map(helper, [1, 2]) outiter = patchdir.collate_iters([makeiter1(), makeiter2()]) assert Iter.equal( outiter, iter([(indicies[0], indicies[0]), (indicies[1], indicies[1]), (indicies[2], None), (indicies[3], indicies[3])])) assert Iter.equal( patchdir.collate_iters([makeiter1(), makeiter2(), makeiter3()]), iter([(indicies[0], indicies[0], None), (indicies[1], indicies[1], indicies[1]), (indicies[2], None, indicies[2]), (indicies[3], indicies[3], None)]), 1) assert Iter.equal(patchdir.collate_iters([makeiter1(), iter([])]), map(lambda i: (i, None), indicies)) assert Iter.equal(map(lambda i: (i, None), indicies), patchdir.collate_iters([makeiter1(), iter([])]))
def testMultiFileParsers(self): class FooParser(parsers.MultiFileParser): supported_artifacts = ["Quux", "Norf"] def ParseFiles(self, knowledge_base, pathspecs, filedescs): raise NotImplementedError() class BarParser(parsers.MultiFileParser): supported_artifacts = ["Quux", "Thud"] def ParseFiles(self, knowledge_base, pathspecs, filedescs): raise NotImplementedError() parsers.MULTI_FILE_PARSER_FACTORY.Register("Foo", FooParser) parsers.MULTI_FILE_PARSER_FACTORY.Register("Bar", BarParser) quux_factory = parsers.ArtifactParserFactory("Quux") quux_parsers = quux_factory.MultiFileParsers() self.assertCountEqual(map(type, quux_parsers), [FooParser, BarParser]) norf_factory = parsers.ArtifactParserFactory("Norf") norf_parsers = norf_factory.MultiFileParsers() self.assertCountEqual(map(type, norf_parsers), [FooParser]) thud_factory = parsers.ArtifactParserFactory("Thud") thud_parsers = thud_factory.MultiFileParsers() self.assertCountEqual(map(type, thud_parsers), [BarParser])
def test_roundtrip_sequence_collections_and_alignments(self): fps = list(map(lambda e: list(map(get_data_path, e)), [('empty', 'empty'), ('fasta_sequence_collection_different_type', 'qual_sequence_collection_different_type')])) for reader, writer in ((_fasta_to_sequence_collection, _sequence_collection_to_fasta), (_fasta_to_alignment, _alignment_to_fasta)): for fasta_fp, qual_fp in fps: # read obj1 = reader(fasta_fp, qual=qual_fp) # write fasta_fh = io.StringIO() qual_fh = io.StringIO() writer(obj1, fasta_fh, qual=qual_fh) fasta_fh.seek(0) qual_fh.seek(0) # read obj2 = reader(fasta_fh, qual=qual_fh) fasta_fh.close() qual_fh.close() self.assertEqual(obj1, obj2)
def LookupClients(self, keywords): """Returns a list of client URNs associated with keywords. Args: keywords: The list of keywords to search by. Returns: A list of client URNs. Raises: ValueError: A string (single keyword) was passed instead of an iterable. """ if isinstance(keywords, string_types): raise ValueError( "Keywords should be an iterable, not a string (got %s)." % keywords) start_time, filtered_keywords = self._AnalyzeKeywords(keywords) keyword_map = data_store.REL_DB.ListClientsForKeywords( list(map(self._NormalizeKeyword, filtered_keywords)), start_time=start_time) relevant_set = functools.reduce(operator.and_, map(set, itervalues(keyword_map))) return sorted(relevant_set)
def test_roundtrip_sequence_collections_and_alignments(self): fps = list(map(lambda e: list(map(get_data_path, e)), [('empty', 'empty'), ('fasta_sequence_collection_different_type', 'qual_sequence_collection_different_type')])) for reader, writer in ((_fasta_to_sequence_collection, _sequence_collection_to_fasta), (_fasta_to_alignment, _alignment_to_fasta)): for fasta_fp, qual_fp in fps: # read obj1 = reader(fasta_fp, qual=qual_fp) # write fasta_fh = StringIO() qual_fh = StringIO() writer(obj1, fasta_fh, qual=qual_fh) fasta_fh.seek(0) qual_fh.seek(0) # read obj2 = reader(fasta_fh, qual=qual_fh) fasta_fh.close() qual_fh.close() # TODO remove this custom equality testing code when # SequenceCollection has an equals method (part of #656). # We need this method to include IDs and description in the # comparison (not part of SequenceCollection.__eq__). self.assertEqual(obj1, obj2) for s1, s2 in zip(obj1, obj2): self.assertTrue(s1.equals(s2))
def correct_blinking(tracks, temporal, spatial): new_tracks = [] init = 0 while init < len(tracks): cur = {init} tr1 = tracks[init] for i in range(init + 1, len(tracks)): tr2 = tracks[i] if min(tr2['frame']) - max(tr1['frame']) > temporal: break elif distance((tr1['xmean'], tr1['ymean']), (tr2['xmean'], tr2['ymean'])) <= spatial: cur.add(i) new_tracks.append(cur) init += 1 cur = {init} first = new_tracks[::-1] second = map( lambda a: a | reduce(lambda b, c: b | c, filter( lambda d: d & a, first)), first) while second != first: first = second second = map( lambda a: a | reduce(lambda b, c: b | c, filter(lambda d: d & a, first)), first) second = np.unique([tuple(a) for a in second]) tracks = [join_tracks([tracks[i] for i in links]) for links in second] with open('outfile.txt', 'w') as outf: outf.write('\n\n'.join([("%s:\t%s" % (k, v)) for k, v in tracks[100].items()])) return tracks
def testMultiFileParsers(self): class FooParser(parser.MultiFileParser): supported_artifacts = ["Quux", "Norf"] def ParseFiles(self, knowledge_base, pathspecs, filedescs): raise NotImplementedError() class BarParser(parser.MultiFileParser): supported_artifacts = ["Quux", "Thud"] def ParseFiles(self, knowledge_base, pathspecs, filedescs): raise NotImplementedError() parsers.MULTI_FILE_PARSER_FACTORY.Register("Foo", FooParser) parsers.MULTI_FILE_PARSER_FACTORY.Register("Bar", BarParser) quux_factory = parsers.ArtifactParserFactory("Quux") quux_parsers = quux_factory.MultiFileParsers() self.assertCountEqual(map(type, quux_parsers), [FooParser, BarParser]) norf_factory = parsers.ArtifactParserFactory("Norf") norf_parsers = norf_factory.MultiFileParsers() self.assertCountEqual(map(type, norf_parsers), [FooParser]) thud_factory = parsers.ArtifactParserFactory("Thud") thud_parsers = thud_factory.MultiFileParsers() self.assertCountEqual(map(type, thud_parsers), [BarParser])
def testMultiResponseParsers(self): class FooParser(parser.MultiResponseParser): supported_artifacts = ["Foo"] def ParseResponses(self, knowledge_base, responses): raise NotImplementedError() class BarParser(parser.MultiResponseParser): supported_artifacts = ["Bar"] def ParseResponses(self, knowledge_base, responses): raise NotImplementedError() parsers.MULTI_RESPONSE_PARSER_FACTORY.Register("Foo", FooParser) parsers.MULTI_RESPONSE_PARSER_FACTORY.Register("Bar", BarParser) foo_factory = parsers.ArtifactParserFactory("Foo") foo_parsers = foo_factory.MultiResponseParsers() self.assertCountEqual(map(type, foo_parsers), [FooParser]) bar_factory = parsers.ArtifactParserFactory("Bar") bar_parsers = bar_factory.MultiResponseParsers() self.assertCountEqual(map(type, bar_parsers), [BarParser])
def LookupClients(self, keywords): """Returns a list of client URNs associated with keywords. Args: keywords: The list of keywords to search by. Returns: A list of client URNs. Raises: ValueError: A string (single keyword) was passed instead of an iterable. """ if isinstance(keywords, string_types): raise ValueError( "Keywords should be an iterable, not a string (got %s)." % keywords) start_time, end_time, filtered_keywords, unversioned_keywords = ( self._AnalyzeKeywords(keywords)) last_seen_map = None if unversioned_keywords: last_seen_map = {} # TODO(user): Make keyword index datetime aware so that # AsMicrosecondsSinceEpoch is unnecessary. raw_results = self.Lookup( list(map(self._NormalizeKeyword, filtered_keywords)), start_time=start_time.AsMicrosecondsSinceEpoch(), end_time=end_time.AsMicrosecondsSinceEpoch(), last_seen_map=last_seen_map) if not raw_results: return [] if unversioned_keywords: universal_last_seen_raw = {} self.ReadPostingLists( list(map(self._NormalizeKeyword, raw_results)), start_time=start_time.AsMicrosecondsSinceEpoch(), end_time=end_time.AsMicrosecondsSinceEpoch(), last_seen_map=universal_last_seen_raw) universal_last_seen = {} for (_, client_id), ts in iteritems(universal_last_seen_raw): universal_last_seen[client_id] = ts old_results = set() for keyword in unversioned_keywords: for result in raw_results: if last_seen_map[(keyword, result)] < universal_last_seen[result]: old_results.add(result) raw_results -= old_results return [rdf_client.ClientURN(result) for result in raw_results]
def deep_force_unicode(value): """ Recursively call force_text on value. """ if isinstance(value, (list, tuple, set)): value = type(value)(map(deep_force_unicode, value)) elif isinstance(value, dict): value = type(value)(map(deep_force_unicode, value.items())) elif isinstance(value, Promise): value = force_text(value) return value
def guess_lang(text): if all(map(lambda c: ord(c) < 128, text)): lang = "en" else: if any(map(lambda c: 0x0800 <= ord(c) <= 0x4e00, text)): lang = "jp" else: lang = "zh" return lang
def undigest(self, blocks): """undigest(blocks : [string]) : string Perform the reverse package transformation on a list of message blocks. Note that the ciphermodule used for both transformations must be the same. blocks is a list of strings of bit length equal to the ciphermodule's block_size. """ # better have at least 2 blocks, for the padbytes package and the hash # block accumulator if len(blocks) < 2: raise ValueError("List must be at least length 2.") # blocks is a list of strings. We need to deal with them as long # integers blocks = list(map(bytes_to_long, blocks)) # Calculate the well-known key, to which the hash blocks are # encrypted, and create the hash cipher. K0 = self.__K0digit * self.__key_size hcipher = self.__newcipher(K0) block_size = self.__ciphermodule.block_size # Since we have all the blocks (or this method would have been called # prematurely), we can calculate all the hash blocks. hashes = [] for i in range(1, len(blocks)): mticki = blocks[i - 1] ^ i hi = hcipher.encrypt(long_to_bytes(mticki, block_size)) hashes.append(bytes_to_long(hi)) # now we can calculate K' (key). remember the last block contains # m's' which we don't include here key = blocks[-1] ^ reduce(operator.xor, hashes) # and now we can create the cipher object mcipher = self.__newcipher(long_to_bytes(key, self.__key_size)) # And we can now decode the original message blocks parts = [] for i in range(1, len(blocks)): cipherblock = mcipher.encrypt(long_to_bytes(i, block_size)) mi = blocks[i - 1] ^ bytes_to_long(cipherblock) parts.append(mi) # The last message block contains the number of pad bytes appended to # the original text string, such that its length was an even multiple # of the cipher's block_size. This number should be small enough that # the conversion from long integer to integer should never overflow padbytes = int(parts[-1]) text = b('').join(map(long_to_bytes, parts[:-1])) return text[:-padbytes]
def undigest(self, blocks): """undigest(blocks : [string]) : string Perform the reverse package transformation on a list of message blocks. Note that the ciphermodule used for both transformations must be the same. blocks is a list of strings of bit length equal to the ciphermodule's block_size. """ # better have at least 2 blocks, for the padbytes package and the hash # block accumulator if len(blocks) < 2: raise ValueError("List must be at least length 2.") # blocks is a list of strings. We need to deal with them as long # integers blocks = list(map(bytes_to_long, blocks)) # Calculate the well-known key, to which the hash blocks are # encrypted, and create the hash cipher. K0 = self.__K0digit * self.__key_size hcipher = self.__newcipher(K0) block_size = self.__ciphermodule.block_size # Since we have all the blocks (or this method would have been called # prematurely), we can calculate all the hash blocks. hashes = [] for i in range(1, len(blocks)): mticki = blocks[i-1] ^ i hi = hcipher.encrypt(long_to_bytes(mticki, block_size)) hashes.append(bytes_to_long(hi)) # now we can calculate K' (key). remember the last block contains # m's' which we don't include here key = blocks[-1] ^ reduce(operator.xor, hashes) # and now we can create the cipher object mcipher = self.__newcipher(long_to_bytes(key, self.__key_size)) # And we can now decode the original message blocks parts = [] for i in range(1, len(blocks)): cipherblock = mcipher.encrypt(long_to_bytes(i, block_size)) mi = blocks[i-1] ^ bytes_to_long(cipherblock) parts.append(mi) # The last message block contains the number of pad bytes appended to # the original text string, such that its length was an even multiple # of the cipher's block_size. This number should be small enough that # the conversion from long integer to integer should never overflow padbytes = int(parts[-1]) text = b('').join(map(long_to_bytes, parts[:-1])) return text[:-padbytes]
def setUp(self): self.positive_fps = list(map(get_data_path, [ 'genbank_5_blanks_start_of_file', 'genbank_single_record_upper', 'genbank_single_record_lower', 'genbank_multi_records'])) self.negative_fps = list(map(get_data_path, [ 'empty', 'whitespace_only', 'genbank_6_blanks_start_of_file', 'genbank_w_beginning_whitespace', 'genbank_missing_locus_name']))
def setUp(self): self.positive_fps = list( map(get_data_path, [ 'genbank_5_blanks_start_of_file', 'genbank_single_record_upper', 'genbank_single_record_lower', 'genbank_multi_records' ])) self.negative_fps = list( map(get_data_path, [ 'empty', 'whitespace_only', 'genbank_6_blanks_start_of_file', 'genbank_w_beginning_whitespace', 'genbank_missing_locus_name' ]))
def from_json_dict(cls, schema_dict, validate=True): # type: (Dict[str, Any], bool) -> Schema """ Make a Schema object from a dictionary. :param schema_dict: This dictionary must have a `'features'` key specifying the columns of the dataset. It must have a `'version'` key containing the master schema version that this schema conforms to. It must have a `'hash'` key with all the globals. :param validate: (default True) Raise an exception if the schema does not conform to the master schema. :return: The resulting :class:`Schema` object. """ if validate: # This raises iff the schema is invalid. validate_schema_dict(schema_dict) hash_properties = GlobalHashingProperties.from_json_dict( schema_dict['clkConfig']) features = schema_dict['features'] return cls( schema_dict['version'], hash_properties, list(map(spec_from_json_dict, features)) )
def recommend_playlist(self): try: action = 'http://music.163.com/weapi/v1/discovery/recommend/songs?csrf_token=' # NOQA self.session.cookies.load() csrf = '' for cookie in self.session.cookies: if cookie.name == '__csrf': csrf = cookie.value if csrf == '': return False action += csrf req = {'offset': 0, 'total': True, 'limit': 20, 'csrf_token': csrf} page = self.session.post(action, data=encrypted_request(req), headers=self.header, timeout=default_timeout) results = json.loads(page.text)['recommend'] song_ids = [] for result in results: song_ids.append(result['id']) data = map(self.song_detail, song_ids) return [d[0] for d in data] except (requests.exceptions.RequestException, ValueError) as e: log.error(e) return False
def ProcessCollectedRegistryStatEntry(self, responses): """Create AFF4 objects for registry statentries. We need to do this explicitly because we call StatFile client action directly for performance reasons rather than using one of the flows that do this step automatically. Args: responses: Response objects from the artifact source. """ if not responses.success: self.CallStateInline(next_state="ProcessCollected", responses=responses) return with data_store.DB.GetMutationPool() as pool: stat_entries = list(map(rdf_client_fs.StatEntry, responses)) filesystem.WriteStatEntries( stat_entries, client_id=self.client_id, mutation_pool=pool, token=self.token) self.CallStateInline( next_state="ProcessCollected", request_data=responses.request_data, messages=stat_entries)
def update_contacts(contacts): contacts = map(_transform_contact_data, contacts) # Filter contact data using whitelist if settings.EMARSYS_RECIPIENT_WHITELIST is not None: contacts = filter(lambda contact: contact[3] # 3=email in settings.EMARSYS_RECIPIENT_WHITELIST, contacts) contacts = list(contacts) assert len(contacts) <= BATCH_SIZE if not contacts: return 0, [], [] num_successful, errors = _update_contacts(contacts) missing_contacts = [email for email, error_dict in errors.items() if '2008' in error_dict] failed_contacts = [(email, error_dict) for email, error_dict in errors.items() if '2008' not in error_dict] return num_successful, missing_contacts, failed_contacts
def _floatWithFormat(self, string, format_string, scale=1): ndigits, ndec = list(map(int, format_string.split('.'))) nint = ndigits - ndec val = self._float(string[0:nint] + '.' + string[nint:nint + ndec]) if val is not None: val *= scale return val
def WMITimeStrToRDFDatetime(self, timestr): """Return RDFDatetime from string like 20140825162259.000000-420. Args: timestr: WMI time string Returns: rdfvalue.RDFDatetime We have some timezone manipulation work to do here because the UTC offset is in minutes rather than +-HHMM """ # We use manual parsing here because the time functions provided (datetime, # dateutil) do not properly deal with timezone information. offset_minutes = timestr[21:] year = timestr[:4] month = timestr[4:6] day = timestr[6:8] hours = timestr[8:10] minutes = timestr[10:12] seconds = timestr[12:14] microseconds = timestr[15:21] unix_seconds = calendar.timegm( tuple(map(int, [year, month, day, hours, minutes, seconds]))) unix_seconds -= int(offset_minutes) * 60 return rdfvalue.RDFDatetime(unix_seconds * 1e6 + int(microseconds))
def get_brief_from_a_card(card_tag): release_date, _ = try_evaluate( lambda: datetime.datetime.strptime( re.search(r"\d\d\d\d-\d\d-\d\d", card_tag.text).group(0), "%Y-%m-%d" ) ) actress = list( map( lambda x: x.text, card_tag.find_all(name="a", attrs={"class": "btn-danger"}), ) ) img, _ = try_evaluate(lambda: card_tag.find(name="img").attrs["data-src"]) if not img.startswith("http"): img = "http:" + img brief = Brief() brief.preview_img_url = img brief.title, _ = try_evaluate(lambda: card_tag.find(name="h5").text.strip(), "") brief.actress = ", ".join(actress) brief.set_release_date(release_date) brief.code = card_tag.find(name="h4").text.strip() return brief
def LookupClients(self, keywords): """Returns a list of client URNs associated with keywords. Args: keywords: The list of keywords to search by. Returns: A list of client URNs. Raises: ValueError: A string (single keyword) was passed instead of an iterable. """ if isinstance(keywords, string_types): raise ValueError( "Keywords should be an iterable, not a string (got %s)." % keywords) start_time, filtered_keywords = self._AnalyzeKeywords(keywords) keyword_map = data_store.REL_DB.ListClientsForKeywords( list(map(self._NormalizeKeyword, filtered_keywords)), start_time=start_time) results = itervalues(keyword_map) relevant_set = set(next(results)) for hits in results: relevant_set &= set(hits) if not relevant_set: return [] return sorted(relevant_set)
def WriteStatEntries(stat_entries, client_id, mutation_pool, token=None): """Persists information about stat entries. Args: stat_entries: A list of `StatEntry` instances. client_id: An id of a client the stat entries come from. mutation_pool: A mutation pool used for writing into the AFF4 data store. token: A token used for writing into the AFF4 data store. """ for stat_response in stat_entries: if stat_response.pathspec.last.stream_name: # This is an ads. In that case we always need to create a file or # we won't be able to access the data. New clients send the correct mode # already but to make sure, we set this to a regular file anyways. # Clear all file type bits: stat_response.st_mode &= ~stat_type_mask stat_response.st_mode |= stat.S_IFREG if data_store.AFF4Enabled(): for stat_entry in stat_entries: CreateAFF4Object(stat_entry, client_id_urn=rdf_client.ClientURN(client_id), mutation_pool=mutation_pool, token=token) if data_store.RelationalDBWriteEnabled(): path_infos = list(map(rdf_objects.PathInfo.FromStatEntry, stat_entries)) data_store.REL_DB.WritePathInfos(client_id, path_infos)
def writelines(self, list): # XXX We could do better here for very long lists # XXX Should really reject non-string non-buffers self._wbuf.extend([_f for _f in map(str, list) if _f]) if (self._wbufsize <= 1 or self._get_wbuf_len() >= self._wbufsize): self.flush()
def test_derivs(self): """Test can take derivs""" dt = 0.1 true_derivs = [] num_vecs = len(self.basis_vec_handles) for i in range(num_vecs): true_derivs.append((self.A_on_basis_vec_handles[i].get() - self.basis_vec_handles[i].get()).squeeze()/dt) deriv_handles = [V.VecHandleArrayText(join(self.test_dir, 'deriv_test%d'%i)) for i in range(num_vecs)] LGP.compute_derivs_handles(self.basis_vec_handles, self.A_on_basis_vec_handles, deriv_handles, dt) derivs_loaded = [v.get() for v in deriv_handles] derivs_loaded = list(map(np.squeeze, derivs_loaded)) list(map(np.testing.assert_allclose, derivs_loaded, true_derivs))
def List(self, responses): """Collect the directory listing and store in the datastore.""" if not responses.success: raise flow.FlowError(str(responses.status)) self.Log("Listed %s", self.state.urn) with data_store.DB.GetMutationPool() as pool: if data_store.AFF4Enabled(): with aff4.FACTORY.Create( self.state.urn, standard.VFSDirectory, mode="w", mutation_pool=pool, token=self.token) as fd: fd.Set(fd.Schema.PATHSPEC(self.state.stat.pathspec)) fd.Set(fd.Schema.STAT(self.state.stat)) if data_store.RelationalDBEnabled(): path_info = rdf_objects.PathInfo.FromStatEntry(self.state.stat) data_store.REL_DB.WritePathInfos(self.client_id, [path_info]) stat_entries = list(map(rdf_client_fs.StatEntry, responses)) WriteStatEntries( stat_entries, client_id=self.client_id, mutation_pool=pool, token=self.token) for stat_entry in stat_entries: self.SendReply(stat_entry) # Send Stats to parent flows.
def StoreDirectory(self, responses): """Stores all stat responses.""" stat_entries = list(map(rdf_client_fs.StatEntry, responses)) WriteStatEntries(stat_entries, client_id=self.client_id) for stat_entry in stat_entries: self.SendReply(stat_entry) # Send Stats to parent flows.
def request(url, params={}, headers={}, data=None, method=None): if params: url = "".join([url, "?", urlencode(params)]) req = urllib.request.Request(url) if method: req.get_method = lambda: method req.add_header("User-Agent", USER_AGENT) req.add_header("Accept-Encoding", "gzip") for k, v in list(headers.items()): req.add_header(k, v) if data: req.add_data(data) try: with closing(urllib.request.urlopen(req)) as response: data = response.read() if response.headers.get("Content-Encoding", "") == "gzip": import zlib data = zlib.decompressobj(16 + zlib.MAX_WBITS).decompress(data) response.data = data response.json = lambda: parse_json(data) response.xml = lambda: parse_xml(data) return response except Exception as e: import traceback list(map(log.error, traceback.format_exc().split("\n"))) notify("%s: %s" % (getLocalizedString(30224), repr(e).encode('utf-8'))) return None, None
def _typelist(x): """Helper function converting all items of x to instances.""" if isinstance(x, collections.Sequence): return list(map(_to_instance, x)) elif isinstance(x, collections.Iterable): return x return None if x is None else [_to_instance(x)]
def List(self, responses): """Collect the directory listing and store in the datastore.""" if not responses.success: raise flow.FlowError(str(responses.status)) self.Log("Listed %s", self.state.urn) with data_store.DB.GetMutationPool() as pool: if data_store.AFF4Enabled(): with aff4.FACTORY.Create( self.state.urn, standard.VFSDirectory, mode="w", mutation_pool=pool, token=self.token) as fd: fd.Set(fd.Schema.PATHSPEC(self.state.stat.pathspec)) fd.Set(fd.Schema.STAT(self.state.stat)) if data_store.RelationalDBWriteEnabled(): path_info = rdf_objects.PathInfo.FromStatEntry(self.state.stat) data_store.REL_DB.WritePathInfos(self.client_id, [path_info]) stat_entries = list(map(rdf_client_fs.StatEntry, responses)) WriteStatEntries( stat_entries, client_id=self.client_id, mutation_pool=pool, token=self.token) for stat_entry in stat_entries: self.SendReply(stat_entry) # Send Stats to parent flows.
def _floatWithFormat(self, string, format_string, scale=1): ndigits, ndec = list(map(int, format_string.split("."))) nint = ndigits - ndec val = self._float(string[0:nint] + "." + string[nint : nint + ndec]) if val is not None: val *= scale return val
def GET(self): qdict = web.input() thedate = qdict[u"date"] # date parameter filters the log values returned; "yyyy-mm-dd" format theday = datetime.date(*map(int, thedate.split(u"-"))) prevday = theday - datetime.timedelta(days=1) prevdate = prevday.strftime(u"%Y-%m-%d") records = read_log() data = [] for event in records: # return any records starting on this date if u"date" not in qdict or event[u"date"] == thedate: data.append(event) # also return any records starting the day before and completing after midnight if event[u"date"] == prevdate: if ( int(event[u"start"].split(":")[0]) * 60 + int(event[u"start"].split(u":")[1]) + int(event[u"duration"].split(u":")[0]) > 24 * 60 ): data.append(event) web.header(u"Content-Type", u"application/json") return json.dumps(data)
def run(self, suites): """ Run the given test case or test suite. """ results = {} time_taken = 0 keys = sorted(suites.keys()) for id in keys: test = suites[id] result = self._makeResult() start = time.time() test(result) stop = time.time() results[id] = result total = stop - start results[id].__dict__['timetaken'] = total if self.timeit: self.stream.writeln('') self.stream.write("obspy.%s: " % (id)) num = test.countTestCases() try: avg = float(total) / num except: avg = 0 msg = '%d tests in %.3fs (average of %.4fs per test)' self.stream.writeln(msg % (num, total, avg)) self.stream.writeln('') time_taken += total runs = 0 faileds = 0 erroreds = 0 wasSuccessful = True if self.verbosity: self.stream.writeln() for result in list(results.values()): failed, errored = list(map(len, (result.failures, result.errors))) faileds += failed erroreds += errored if not result.wasSuccessful(): wasSuccessful = False result.printErrors() runs += result.testsRun if self.verbosity: self.stream.writeln(unittest._TextTestResult.separator2) self.stream.writeln("Ran %d test%s in %.3fs" % (runs, runs != 1 and "s" or "", time_taken)) self.stream.writeln() if not wasSuccessful: self.stream.write("FAILED (") if faileds: self.stream.write("failures=%d" % faileds) if erroreds: if faileds: self.stream.write(", ") self.stream.write("errors=%d" % erroreds) self.stream.writeln(")") elif self.verbosity: self.stream.writeln("OK") return results, time_taken, (faileds + erroreds)
def exec_cmd(command, *args, **kwargs): cmd = shlex.split(command) cmd.extend(map(convert.to_bytes, args)) xargs = {'bufsize': -1, 'stdout': PIPE, 'stderr': PIPE} xargs.update(kwargs) return Popen(cmd, **xargs)
def get_title_id(self, level, begin=1): x = self.titles_ids.setdefault(level, 0) + 1 self.titles_ids[level] = x _ids = [] for x in range(begin, level+1): y = self.titles_ids.setdefault(x, 0) _ids.append(y) return 'title_%s' % '-'.join(map(str, _ids))
def __init__(self, features, tolerant=False, sparse=True): # Upgrade `features` to `Feature` instances. features = list(map(make_feature, features)) if tolerant: self.evaluator = TolerantFeatureEvaluator(features) else: self.evaluator = FeatureEvaluator(features) self.flattener = FeatureMappingFlattener(sparse=sparse)
def reduce_by_key_t(func): return Transformation( 'reduce_by_key({0})'.format(name(func)), lambda sequence: map( lambda kv: (kv[0], reduce(func, kv[1])), group_by_key_impl(sequence) ), None )
def find_template(self, name, context, peeking=False): """ Replacement for Django's ``find_template`` that uses the current template context to keep track of which template directories it has used when finding a template. This allows multiple templates with the same relative name/path to be discovered, so that circular template inheritance can occur. """ # These imports want settings, which aren't available when this # module is imported to ``add_to_builtins``, so do them here. import django.template.loaders.app_directories as app_directories from mezzanine.conf import settings # Store a dictionary in the template context mapping template # names to the lists of template directories available to # search for that template. Each time a template is loaded, its # origin directory is removed from its directories list. context_name = "OVEREXTENDS_DIRS" if context_name not in context: context[context_name] = {} if name not in context[context_name]: all_dirs = ( list(chain.from_iterable( [template_engine.get('DIRS', []) for template_engine in settings.TEMPLATES])) + list(app_directories.get_app_template_dirs('templates'))) # os.path.abspath is needed under uWSGI, and also ensures we # have consistent path separators across different OSes. context[context_name][name] = list(map(os.path.abspath, all_dirs)) # Build a list of template loaders to use. For loaders that wrap # other loaders like the ``cached`` template loader, unwind its # internal loaders and add those instead. loaders = [] for loader in context.template.engine.template_loaders: loaders.extend(getattr(loader, "loaders", [loader])) # Go through the loaders and try to find the template. When # found, removed its absolute path from the context dict so # that it won't be used again when the same relative name/path # is requested. for loader in loaders: dirs = context[context_name][name] try: source, path = loader.load_template_source(name, dirs) except TemplateDoesNotExist: pass else: # Only remove the absolute path for the initial call in # get_parent, and not when we're peeking during the # second call. if not peeking: remove_path = os.path.abspath(path[:-len(name) - 1]) context[context_name][name].remove(remove_path) return Template(source) raise TemplateDoesNotExist(name)
def save(self, *args, **kwargs): """ Validate that the rating falls between the min and max values. """ valid = map(str, settings.RATINGS_RANGE) if str(self.value) not in valid: raise ValueError("Invalid rating. %s is not in %s" % (self.value, ", ".join(valid))) super(Rating, self).save(*args, **kwargs)