def wrapped_reader(fp, mode='U', mutate_fh=False, **kwargs): if not _is_iter_list(fp): fp = [fp] with open_files(fp, mode) as fhs: generator = reader(*fhs, **kwargs) if mutate_fh or (not is_compound and _is_string_or_bytes(fp[0])): while True: yield next(generator) else: orig_positions = [fh.tell() for fh in fhs] read_positions = orig_positions try: while True: orig_positions = [fh.tell() for fh in fhs] for fh, pos in zip(fhs, read_positions): fh.seek(pos) next_result = next(generator) read_positions = [fh.tell() for fh in fhs] for fh, pos in zip(fhs, orig_positions): fh.seek(pos) yield next_result finally: for fh, pos in zip(fhs, orig_positions): fh.seek(pos)
def testFilterConsidersOffsetAndCount(self): client_id = self.client_ids[0] # Create five approval requests without granting them. for i in range(10): with test_lib.FakeTime(42 + i): self.RequestClientApproval( client_id.Basename(), reason="Request reason %d" % i) args = user_plugin.ApiListClientApprovalsArgs( client_id=client_id, offset=0, count=5) result = self.handler.Handle(args, token=self.token) # Approvals are returned newest to oldest, so the first five approvals # have reason 9 to 5. self.assertLen(result.items, 5) for item, i in zip(result.items, reversed(range(6, 10))): self.assertEqual(item.reason, "Request reason %d" % i) # When no count is specified, take all items from offset to the end. args = user_plugin.ApiListClientApprovalsArgs(client_id=client_id, offset=7) result = self.handler.Handle(args, token=self.token) self.assertLen(result.items, 3) for item, i in zip(result.items, reversed(range(0, 3))): self.assertEqual(item.reason, "Request reason %d" % i)
def test_retrieve_contiguous(mock_jagged_raw, columns, contiguity): originals, ne, nc, dtype, segments, reader, rng = mock_jagged_raw if columns is not None: originals = [o[:, tuple(columns)] for o in originals] # sanity checks for wrong inputs with pytest.raises(ValueError) as excinfo: retrieve_contiguous(segments, columns, reader, dtype, ne, nc, 'wrong') assert 'Unknown contiguity scheme:' in str(excinfo.value) with pytest.raises(ValueError) as excinfo: retrieve_contiguous([(-1, 1)], columns, reader, dtype, ne, nc, contiguity) assert 'Out of bounds query (base=-1, size=1' in str(excinfo.value) with pytest.raises(ValueError) as excinfo: retrieve_contiguous([(0, 100000)], columns, reader, dtype, ne, nc, contiguity) assert 'Out of bounds query (base=0, size=100000' in str(excinfo.value) # insertion order views = retrieve_contiguous(segments, columns, reader, dtype, ne, nc, contiguity) for o, v in zip(originals, views): assert np.allclose(o, v) # random order o_s = list(zip(originals, segments)) rng.shuffle(o_s) originals, segments = zip(*o_s) views = retrieve_contiguous(segments, columns, reader, dtype, ne, nc, contiguity) for o, v in zip(originals, views): assert np.allclose(o, v)
def _fasta_sniffer(fh): # Strategy: # Ignore up to 5 blank/whitespace-only lines at the beginning of the # file. Read up to 10 FASTA records. If at least one record is read (i.e. # the file isn't empty) and no errors are thrown during reading, assume # the file is in FASTA format. Next, try to parse the file as QUAL, which # has stricter requirements. If this succeeds, do *not* identify the file # as FASTA since we don't want to sniff QUAL files as FASTA (technically # they can be read as FASTA since the sequences aren't validated but it # probably isn't what the user wanted). Also, if we add QUAL as its own # file format in the future, we wouldn't want the FASTA and QUAL sniffers # to both identify a QUAL file. if _too_many_blanks(fh, 5): return False, {} num_records = 10 try: not_empty = False for _ in zip(range(num_records), _fasta_to_generator(fh)): not_empty = True if not_empty: fh.seek(0) try: list(zip(range(num_records), _parse_fasta_raw(fh, _parse_quality_scores, 'QUAL'))) except FASTAFormatError: return True, {} else: return False, {} else: return False, {} except FASTAFormatError: return False, {}
def test_fasta_to_sequence_collection_and_alignment(self): test_cases = (self.empty, self.single, self.sequence_collection_different_type) for constructor, reader_fn in ((SequenceCollection, _fasta_to_sequence_collection), (Alignment, _fasta_to_alignment)): # see comment in test_fasta_to_generator_valid_files (above) for # testing strategy for exp_list, kwargs, fasta_fps, qual_fps in test_cases: exp = constructor(exp_list) for fasta_fp in fasta_fps: obs = reader_fn(fasta_fp, **kwargs) # TODO remove this custom equality testing code when # SequenceCollection has an equals method (part of #656). # We need this method to include IDs and description in the # comparison (not part of SequenceCollection.__eq__). self.assertEqual(len(obs), len(exp)) for o, e in zip(obs, exp): self.assertTrue(o.equals(e, ignore=['quality'])) for qual_fp in qual_fps: obs = reader_fn(fasta_fp, qual=qual_fp, **kwargs) # TODO remove this custom equality testing code when # SequenceCollection has an equals method (part of # #656). We need this method to include IDs and # description in the comparison (not part of # SequenceCollection.__eq__). self.assertEqual(obs, exp) for o, e in zip(obs, exp): self.assertTrue(o.equals(e))
def __init__(self): self.quads_index = [] with open(self.names_file, 'r') as fh: self.names = [name.strip() for name in fh] with open(self.quadsindex_file, 'r') as fh: indexes = [] for index in fh: indexes += [n.strip() for n in index.split(' ') if n != ''] self.lons_per_lat = dict(list(zip( self.quads_order, [indexes[x:x + 91] for x in range(0, len(indexes), 91)] ))) self.lat_begins = {} for quad, index in list(self.lons_per_lat.items()): begin = 0 end = -1 begins = [] n = 0 for item in index: n += 1 begin = end + 1 begins.append(begin) end += int(item) self.lat_begins[quad] = begins self.lons = {} self.fenums = {} for quad, sect_file in zip(self.quads_order, self.sect_files): sect = [] with open(sect_file, 'r') as fh: for line in fh: sect += [int(v) for v in line.strip().split(' ') if v != ''] lons = [] fenums = [] n = 0 for item in sect: n += 1 if n % 2: lons.append(item) else: fenums.append(item) self.lons[quad] = lons self.fenums[quad] = fenums with open(self.numbers_file, 'rt') as csvfile: FE_csv = csv.reader(csvfile, delimiter=native_str(';'), quotechar=native_str('#'), skipinitialspace=True) self.by_number = \ dict((int(row[0]), row[1]) for row in FE_csv if len(row) > 1)
def test_fasta_to_generator_valid_files(self): test_cases = (self.empty, self.single, self.multi, self.odd_labels_different_type, self.sequence_collection_different_type, self.lowercase_seqs) # Strategy: # for each fasta file, read it without its corresponding qual file, # and ensure observed vs. expected match, ignoring quality scores in # expected. next, parse the current fasta file with each # corresponding quality file and ensure that observed vs. expected # match, this time taking quality scores into account. this # sufficiently exercises parsing a standalone fasta file and paired # fasta/qual files for exp, kwargs, fasta_fps, qual_fps in test_cases: for fasta_fp in fasta_fps: obs = list(_fasta_to_generator(fasta_fp, **kwargs)) self.assertEqual(len(obs), len(exp)) for o, e in zip(obs, exp): e = e.copy() del e.positional_metadata['quality'] self.assertEqual(o, e) for qual_fp in qual_fps: obs = list(_fasta_to_generator(fasta_fp, qual=qual_fp, **kwargs)) self.assertEqual(len(obs), len(exp)) for o, e in zip(obs, exp): self.assertEqual(o, e)
def _compute_score_and_traceback_matrices( seq1, seq2, gap_open_penalty, gap_extend_penalty, substitution_matrix, new_alignment_score=-np.inf, init_matrices_f=_init_matrices_nw): """Return dynamic programming (score) and traceback matrices """ # cache some values for quicker/simpler access aend = _traceback_encoding['alignment-end'] match = _traceback_encoding['match'] vgap = _traceback_encoding['vertical-gap'] hgap = _traceback_encoding['horizontal-gap'] new_alignment_score = (new_alignment_score, aend) # Initialize a matrix to use for scoring the alignment and for tracing # back the best alignment score_matrix, traceback_matrix = init_matrices_f( seq1, seq2, gap_open_penalty, gap_extend_penalty) # Iterate over the characters in sequence two (which will correspond # to the vertical sequence in the matrix) # Note that i corresponds to column numbers, as in 'Biological Sequence # Analysis' for i, c2 in zip(range(1, len(seq2)+1), seq2): # Iterate over the characters in sequence one (which will # correspond to the horizontal sequence in the matrix) # Note that j corresponds to row numbers, as in 'Biological Sequence # Analysis' for j, c1 in zip(range(1, len(seq1)+1), seq1): try: substitution_score = substitution_matrix[c1][c2] except KeyError: offending_chars = \ [c for c in (c1, c2) if c not in substitution_matrix] raise ValueError( "One of the sequences contains a character that is not " "contained in the substitution matrix. Are you using " "an appropriate substitution matrix for your sequence " "type (e.g., a nucleotide substitution matrix does not " "make sense for aligning protein sequences)? Does your " "sequence contain invalid characters? The offending " "character(s) is: %s." % ', '.join(offending_chars)) diag_score = (score_matrix[i-1, j-1] + substitution_score, match) if traceback_matrix[i-1, j] == vgap: # gap extend, because the cell above was also a gap up_score = (score_matrix[i-1, j] - gap_extend_penalty, vgap) else: # gap open, because the cell above was not a gap up_score = (score_matrix[i-1, j] - gap_open_penalty, vgap) if traceback_matrix[i, j-1] == hgap: # gap extend, because the cell to the left was also a gap left_score = (score_matrix[i, j-1] - gap_extend_penalty, hgap) else: # gap open, because the cell to the left was not a gap left_score = (score_matrix[i, j-1] - gap_open_penalty, hgap) best_score = _first_largest([new_alignment_score, left_score, diag_score, up_score]) score_matrix[i, j] = best_score[0] traceback_matrix[i, j] = best_score[1] return score_matrix, traceback_matrix
def _common_creation_steps(cls, md_template, obj_id): r"""Executes the common creation steps Parameters ---------- md_template : DataFrame The metadata template file contents indexed by sample ids obj_id : int The id of the object being created """ with TRN: cls._check_subclass() # Get some useful information from the metadata template sample_ids = md_template.index.tolist() headers = sorted(md_template.keys().tolist()) # Insert values on template_sample table values = [[obj_id, s_id] for s_id in sample_ids] sql = """INSERT INTO qiita.{0} ({1}, sample_id) VALUES (%s, %s)""".format(cls._table, cls._id_column) TRN.add(sql, values, many=True) # Insert rows on *_columns table datatypes = get_datatypes(md_template.ix[:, headers]) # psycopg2 requires a list of tuples, in which each tuple is a set # of values to use in the string formatting of the query. We have # all the values in different lists (but in the same order) so use # zip to create the list of tuples that psycopg2 requires. values = [[obj_id, h, d] for h, d in zip(headers, datatypes)] sql = """INSERT INTO qiita.{0} ({1}, column_name, column_type) VALUES (%s, %s, %s)""".format(cls._column_table, cls._id_column) TRN.add(sql, values, many=True) # Create table with custom columns table_name = cls._table_name(obj_id) column_datatype = ["%s %s" % (col, dtype) for col, dtype in zip(headers, datatypes)] sql = """CREATE TABLE qiita.{0} ( sample_id varchar NOT NULL, {1}, CONSTRAINT fk_{0} FOREIGN KEY (sample_id) REFERENCES qiita.study_sample (sample_id) ON UPDATE CASCADE )""".format(table_name, ', '.join(column_datatype)) TRN.add(sql) # Insert values on custom table values = as_python_types(md_template, headers) values.insert(0, sample_ids) values = [list(v) for v in zip(*values)] sql = """INSERT INTO qiita.{0} (sample_id, {1}) VALUES (%s, {2})""".format( table_name, ", ".join(headers), ', '.join(["%s"] * len(headers))) TRN.add(sql, values, many=True) # Execute all the steps TRN.execute()
def _traceback(traceback_matrix, score_matrix, aln1, aln2, start_row, start_col): # cache some values for simpler reference aend = _traceback_encoding["alignment-end"] match = _traceback_encoding["match"] vgap = _traceback_encoding["vertical-gap"] hgap = _traceback_encoding["horizontal-gap"] gap_character = aln1.dtype.default_gap_char # initialize the result alignments aln1_sequence_count = aln1.shape.sequence aligned_seqs1 = [[] for e in range(aln1_sequence_count)] aln2_sequence_count = aln2.shape.sequence aligned_seqs2 = [[] for e in range(aln2_sequence_count)] current_row = start_row current_col = start_col best_score = score_matrix[current_row, current_col] current_value = None while current_value != aend: current_value = traceback_matrix[current_row, current_col] if current_value == match: for aligned_seq, input_seq in zip(aligned_seqs1, aln1): aligned_seq.append(str(input_seq[current_col - 1])) for aligned_seq, input_seq in zip(aligned_seqs2, aln2): aligned_seq.append(str(input_seq[current_row - 1])) current_row -= 1 current_col -= 1 elif current_value == vgap: for aligned_seq in aligned_seqs1: aligned_seq.append(gap_character) for aligned_seq, input_seq in zip(aligned_seqs2, aln2): aligned_seq.append(str(input_seq[current_row - 1])) current_row -= 1 elif current_value == hgap: for aligned_seq, input_seq in zip(aligned_seqs1, aln1): aligned_seq.append(str(input_seq[current_col - 1])) for aligned_seq in aligned_seqs2: aligned_seq.append(gap_character) current_col -= 1 elif current_value == aend: continue else: raise ValueError("Invalid value in traceback matrix: %s" % current_value) for i in range(aln1_sequence_count): aligned_seq = "".join(aligned_seqs1[i][::-1]) constructor = aln1.dtype aligned_seqs1[i] = constructor(aligned_seq) for i in range(aln2_sequence_count): aligned_seq = "".join(aligned_seqs2[i][::-1]) constructor = aln2.dtype aligned_seqs2[i] = constructor(aligned_seq) return aligned_seqs1, aligned_seqs2, best_score, current_col, current_row
def _write_csv(input_files, results, csv_path=None): """ Writes out results as a csv. :param input_files: List of filenames for each respective metadata. :param results: List of metadata dictionaries. :param csv_path: Path to write out csv file, defaults to stdout. :raises IOError: If csv could not be written out. """ scan_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') # Add/Teak metadata. for inputfilename, metadata in zip(input_files, results): # Add scan date. metadata[u'scan_date'] = scan_date if u'inputfilename' not in metadata: metadata[u'inputfilename'] = inputfilename # Flatten 'other' entry so nested values get their own columns, # are more readable, and easier to individually analyze. # # Example: # {'other': {"unique_entry": "value", "unique_key": "value2"}} # Results in columns: other, other.unique_entry, other.unique_key if u'other' in metadata: for sub_key, sub_value in metadata[u'other'].items(): metadata[u'other.{}'.format(convert_to_unicode(sub_key))] = sub_value del metadata[u'other'] # Split outputfile into multiple fields. if u'outputfile' in metadata: value = list(zip(*metadata[u'outputfile'])) metadata[u'outputfile.name'] = value[0] metadata[u'outputfile.description'] = value[1] metadata[u'outputfile.md5'] = value[2] del metadata[u'outputfile'] # Sort columns, but with PREFIX_COLUMNS showing up first. column_names = set(itertools.chain(*(metadata.keys() for metadata in results))) column_names = sorted( column_names, key=lambda x: str(_STD_CSV_COLUMNS.index(x)) if x in _STD_CSV_COLUMNS else str(x)) # Reformat metadata and write to CSV if csv_path is None: csvfile = sys.stdout else: csvfile = open(csv_path, 'wb' if six.PY2 else 'w') try: dw = csv.DictWriter(csvfile, fieldnames=column_names, lineterminator='\n') dw.writeheader() for metadata in results: dw.writerow({k: _format_metadata_value(v) for k, v in metadata.items()}) finally: if csv_path: csvfile.close()
def hinton(inarray, max_value=None, use_default_ticks=True): """Plot Hinton diagram for visualizing the values of a 2D array. Plot representation of an array with positive and negative values represented by white and black squares, respectively. The size of each square represents the magnitude of each value. Unlike the hinton demo in the matplotlib gallery [1]_, this implementation uses a RegularPolyCollection to draw squares, which is much more efficient than drawing individual Rectangles. .. note:: This function inverts the y-axis to match the origin for arrays. .. [1] http://matplotlib.sourceforge.net/examples/api/hinton_demo.html Parameters ---------- inarray : array Array to plot. max_value : float Any *absolute* value larger than `max_value` will be represented by a unit square. use_default_ticks: boolean Disable tick-generation and generate them outside this function. """ ax = plt.gca() ax.set_axis_bgcolor('gray') # make sure we're working with a numpy array, not a numpy matrix inarray = np.asarray(inarray) height, width = inarray.shape if max_value is None: max_value = 2**np.ceil(np.log(np.max(np.abs(inarray)))/np.log(2)) values = np.clip(inarray/max_value, -1, 1) rows, cols = np.mgrid[:height, :width] pos = np.where(values > 0) neg = np.where(values < 0) for idx, color in zip([pos, neg], ['white', 'black']): if len(idx[0]) > 0: xy = list(zip(cols[idx], rows[idx])) circle_areas = np.pi / 2 * np.abs(values[idx]) squares = SquareCollection(sizes=circle_areas, offsets=xy, transOffset=ax.transData, facecolor=color, edgecolor=color) ax.add_collection(squares, autolim=True) ax.axis('scaled') # set data limits instead of using xlim, ylim. ax.set_xlim(-0.5, width-0.5) ax.set_ylim(height-0.5, -0.5) if use_default_ticks: ax.xaxis.set_major_locator(IndexLocator()) ax.yaxis.set_major_locator(IndexLocator())
def test_getWaveformNSCL(self): """ Tests getWaveformNSCL method. """ client = Client(host="137.227.224.97", port=2061) # now - 5 hours t = UTCDateTime() - 5 * 60 * 60 duration_long = 3600.0 duration = 1.0 components = ["1", "2", "Z"] # try one longer request to see if fetching multiple blocks works st = client.getWaveformNSCL("IUANMO BH.00", t, duration_long) # merge to avoid failing tests simply due to gaps st.merge() st.sort() self.assertTrue(len(st) == 3) for tr, component in zip(st, components): stats = tr.stats self.assertTrue(stats.station == "ANMO") self.assertTrue(stats.network == "IU") self.assertTrue(stats.location == "00") self.assertTrue(stats.channel == "BH" + component) self.assertTrue(stats.endtime - stats.starttime == duration_long) # if the following fails this is likely due to a change at the # requested station and simply has to be adapted self.assertTrue(stats.sampling_rate == 20.0) self.assertTrue(len(tr) == 72001) # now use shorter piece, this is faster and less error prone (gaps etc) st = client.getWaveformNSCL("IUANMO BH.00", t, duration) st.sort() # test returned stream self.assertTrue(len(st) == 3) for tr, component in zip(st, components): stats = tr.stats self.assertTrue(stats.station == "ANMO") self.assertTrue(stats.network == "IU") self.assertTrue(stats.location == "00") self.assertTrue(stats.channel == "BH" + component) self.assertTrue(stats.endtime - stats.starttime == duration) # if the following fails this is likely due to a change at the # requested station and simply has to be adapted self.assertTrue(stats.sampling_rate == 20.0) self.assertTrue(len(tr) == 21) # try a series of regex patterns that should return the same data st = client.getWaveformNSCL("IUANMO BH", t, duration) patterns = ["IUANMO BH...", "IUANMO BH.*", "IUANMO BH[Z12].*", "IUANMO BH[Z12]..", "IUANMO B.*", "..ANMO B.*"] for pattern in patterns: st2 = client.getWaveformNSCL(pattern, t, duration) self.assertTrue(st == st2)
def _to_values(self, collection): """ Regroup values in tuples or dicts for each "instance". Exemple: Given this result from redis: ['id1', 'name1', 'id2', 'name2'] tuples: [('id1', 'name1'), ('id2', 'name2')] dicts: [{'id': 'id1', 'name': 'name1'}, {'id': 'id2', 'name': 'name2'}] """ result = zip(*([iter(collection)] * len(self._values['fields']['names']))) if self._values['mode'] == 'dicts': result = (dict(zip(self._values['fields']['names'], a_result)) for a_result in result) return result
def _button_press_callback(self, event): """ What actions to perform when a mouse button is clicked """ if event.inaxes != self.modelax: return if event.button == 1 and not self._drawing and self.polygons: self._lastevent = event for line, poly in zip(self.lines, self.polygons): poly.set_animated(False) line.set_animated(False) line.set_color([0, 0, 0, 0]) self.canvas.draw() # Find out if a click happened on a vertice # and which vertice of which polygon self._ipoly, self._ivert = self._get_polygon_vertice_id(event) if self._ipoly is not None: self.density_slider.set_val(self.densities[self._ipoly]) self.polygons[self._ipoly].set_animated(True) self.lines[self._ipoly].set_animated(True) self.lines[self._ipoly].set_color([0, 1, 0, 0]) self.canvas.draw() self.background = self.canvas.copy_from_bbox(self.modelax.bbox) self.modelax.draw_artist(self.polygons[self._ipoly]) self.modelax.draw_artist(self.lines[self._ipoly]) self.canvas.blit(self.modelax.bbox) elif self._drawing: if event.button == 1: self._xy.append([event.xdata, event.ydata]) self._drawing_plot.set_data(list(zip(*self._xy))) self.canvas.restore_region(self.background) self.modelax.draw_artist(self._drawing_plot) self.canvas.blit(self.modelax.bbox) elif event.button == 3: if len(self._xy) >= 3: density = self.density_slider.val poly, line = self._make_polygon(self._xy, density) self.polygons.append(poly) self.lines.append(line) self.densities.append(density) self.modelax.add_patch(poly) self.modelax.add_line(line) self._drawing_plot.remove() self._drawing_plot = None self._xy = None self._drawing = False self._ipoly = len(self.polygons) - 1 self.lines[self._ipoly].set_color([0, 1, 0, 0]) self.dataax.set_title(self.instructions) self.canvas.draw() self._update_data() self._update_data_plot()
def test_roundtrip(jagged_raw, dataset, columns): jagged_raw, path = jagged_raw jagged_raw = partial(jagged_raw, path=path) rng, originals, ncol = dataset # Write keys = [] with jagged_raw() as jr: total = 0 assert jr.dtype is None assert jr.shape is None for original in originals: key = jr.append(original) assert jr.is_writing keys.append(key) total += len(original) assert len(jr) == total assert jr.dtype == originals[0].dtype assert jr.shape == (total, ncol) # Read def test_read(originals, keys): if columns is not None: originals = [o[:, columns] for o in originals] # test read, one by one with jagged_raw() as jr: for original, key in zip(originals, keys): roundtripped = jr.get([key], columns=columns)[0] assert np.allclose(original, roundtripped) # test read, in a batch with jagged_raw() as jr: for original, roundtripped in zip(originals, jr.get(keys, columns=columns)): assert np.allclose(original, roundtripped) # read all # with jagged_raw() as jr: # for original, roundtripped in zip(originals, jr.get(columns=columns)): # original = original if columns is None else original[:, columns] # assert np.allclose(original, roundtripped) # read in insertion order test_read(originals, keys) # read in random order or_s = list(zip(originals, keys)) rng.shuffle(or_s) originals, keys = zip(*or_s) test_read(originals, keys)
def _write_csv(input_files, results, csv_path, base64_outputfiles=False): """ Writes out results as a csv. :param input_files: List of filenames for each respective metadata. :param results: List of metadata dictionaries. :param csv_path: Path to write out csv file. :param base64_outputfiles: Whether to include base64 outputfiles. :raises IOError: If csv could not be written out. """ scan_date = time.ctime() # Add/Teak metadata. for inputfilename, metadata in zip(input_files, results): # Add scan date. metadata['scan_date'] = scan_date if 'inputfilename' not in metadata: metadata['inputfilename'] = inputfilename # Flatten 'other' entry so nested values get their own columns, # are more readable, and easier to individually analyze. # # Example: # {'other': {"unique_entry": "value", "unique_key": "value2"}} # Results in columns: other, other.unique_entry, other.unique_key if 'other' in metadata: for sub_key, sub_value in metadata['other'].items(): metadata['other.{}'.format(convert_to_unicode(sub_key))] = sub_value del metadata['other'] # Split outputfile into multiple fields. if 'outputfile' in metadata: value = list(zip(*metadata['outputfile'])) metadata['outputfile.name'] = value[0] metadata['outputfile.description'] = value[1] metadata['outputfile.md5'] = value[2] if len(value) > 3 and base64_outputfiles: metadata['outputfile.base64'] = value[3] del metadata['outputfile'] # Sort columns, but with PREFIX_COLUMNS showing up first. column_names = set(itertools.chain(*(metadata.keys() for metadata in results))) column_names = sorted( column_names, key=lambda x: str(_STD_CSV_COLUMNS.index(x)) if x in _STD_CSV_COLUMNS else x) # Reformat metadata and write to CSV with open(csv_path, 'wb' if sys.version_info.major < 3 else 'w') as csvfile: dw = csv.DictWriter(csvfile, fieldnames=column_names, lineterminator='\n') dw.writeheader() for metadata in results: dw.writerow({k: _format_metadata_value(v) for k, v in metadata.items()})
def test_count_multi_by_record(self): counts_by_record_left = [utils.counts(record, 8) for record in utils.SEQUENCES_LEFT] counts_by_record_right = [utils.counts(record, 8) for record in utils.SEQUENCES_RIGHT] names_left = [str(i) for i, _ in enumerate(counts_by_record_left)] names_right = [str(i) for i, _ in enumerate(counts_by_record_right)] filename = self.empty() with open(self.fasta(utils.SEQUENCES_LEFT, names=names_left)) as handle_left: with open(self.fasta(utils.SEQUENCES_RIGHT, names=names_right)) as handle_right: with utils.open_profile(filename, 'w') as profile_handle: kmer.count([handle_left, handle_right], profile_handle, 8, names=['a', 'b'], by_record=True) for name, counts in zip(names_left, counts_by_record_left): utils.test_profile_file(filename, counts, 8, name='a_' + name) for name, counts in zip(names_right, counts_by_record_right): utils.test_profile_file(filename, counts, 8, name='b_' + name)
def test_read(originals, keys): if columns is not None: originals = [o[:, columns] for o in originals] # test read, one by one with jagged_raw() as jr: for original, key in zip(originals, keys): roundtripped = jr.get([key], columns=columns)[0] assert np.allclose(original, roundtripped) # test read, in a batch with jagged_raw() as jr: for original, roundtripped in zip(originals, jr.get(keys, columns=columns)): assert np.allclose(original, roundtripped)
def __eq__(self, other): r"""The equality operator. Parameters ---------- other : `SequenceCollection` The `SequenceCollection` to test for equality against. Returns ------- bool Indicates whether `self` and `other` are equal. Notes ----- `SequenceCollection` objects are equal if they are the same type, contain the same number of sequences, and if each of the `skbio.core.sequence.BiologicalSequence` objects, in order, are equal. .. shownumpydoc """ if self.__class__ != other.__class__: return False elif len(self) != len(other): return False else: for self_seq, other_seq in zip(self, other): if self_seq != other_seq: return False return True
def count(input_handles, output_handle, size, names=None, by_record=False): """ Make k-mer profiles from FASTA files. :arg input_handles: Open readable FASTA file handles. :type input_handles: list(file-like object) :arg output_handle: Open writeable k-mer profile file handle. :type output_handle: h5py.File :arg size: Size of k. :type size: int :arg names: Optional list of names for the created k-mer profiles (must have the same length as `input_handles`). If not provided, profiles are named according to the input filenames, or numbered consecutively from 1 if no filenames are available. :type names: list(str) :arg bool by_record: If `True`, make a k-mer profile per FASTA record instead of a k-mer profile per FASTA file. Profiles are named by the record names and prefixed according to `names` if more than one FASTA file is given). """ names = names or [_name_from_handle(h) for h in input_handles] if len(names) != len(input_handles): raise ValueError(NAMES_COUNT_ERROR) for input_handle, name in zip(input_handles, names): if by_record: prefix = name if len(input_handles) > 1 else None profiles = klib.Profile.from_fasta_by_record(input_handle, size, prefix=prefix) else: profiles = [klib.Profile.from_fasta(input_handle, size, name=name)] for profile in profiles: profile.save(output_handle)
def stylable_result_headers(cl): """ Reuse the existing result_headers() iterator, and add a `col-FIELD_NAME` class to the header, and fieldname to assist JavaScript cl = The django ChangeList object """ for field_name, header in zip(cl.list_display, result_headers(cl)): header['field_name'] = field_name # For JavaScript if header.get('class_attrib'): # Remove any sorting marker for mptt tables, because they are not sortable. if hasattr(cl.model, '_mptt_meta'): header['class_attrib'] = header['class_attrib'].replace( 'sortable', '').replace('sorted', '').replace('ascending', '') header['class_attrib'] = mark_safe(header['class_attrib'].replace( 'class="', 'class="col-%s ' % field_name)) else: header['class_attrib'] = mark_safe(' class="col-%s"' % field_name) if 'url_primary' in header and 'url' not in header: header['url'] = header[ 'url_primary'] # Django 1.3 template compatibility. yield header
def plot_mags(result, fname=None, title=None, xlim=None, ylim=None, figsize=None): fig = plt.figure(figsize=figsize) ax = fig.add_subplot(111) temp = [(r['Mcat'], r['Mw']) for r in result['events'].values() if r.get('Mcat') is not None and r.get('Mw') is not None] if len(temp) == 0: return Mcat, Mw = zip(*temp) ax.plot(Mcat, Mw, 'ok', ms=MS) if xlim is not None: mmin, mmax = xlim else: mmin, mmax = np.min(Mcat), np.max(Mcat) m = np.linspace(mmin, mmax, 100) if len(Mw) > 2: a, b = linear_fit(Mw, Mcat) ax.plot(m, a * m + b, '-m', label='%.2fM %+.2f' % (a, b)) if len(Mw) > 3: _, b2 = linear_fit(Mw, Mcat, m=1) ax.plot(m, m + b2, '--m', label='M %+.2f' % (b2,)) if len(Mw) > 2: ax.legend(loc='lower right') if xlim: ax.set_xlim(xlim) ax.set_xlabel('M from catalog') ax.set_ylabel('Mw from inversion') _savefig(fig, fname=fname, title=title)
def coerce_zadd_args(*args, **kwargs): """ Take arguments attended by a zadd call, named or not, and return a flat list that can be used. A callback can be called with all "values" (as *args) if defined as the `values_callback` named argument. Real values will then be the result of this callback. """ values_callback = kwargs.pop('values_callback', None) pieces = [] if args: if len(args) % 2 != 0: raise RedisError("ZADD requires an equal number of " "values and scores") pieces.extend(args) for pair in iteritems(kwargs): pieces.append(pair[1]) pieces.append(pair[0]) values = pieces[1::2] if values_callback: values = values_callback(*values) scores = pieces[0::2] pieces = [] for z in zip(scores, values): pieces.extend(z) return pieces
def test_data(self): for dm, exp in zip(self.dms, self.dm_redundant_forms): obs = dm.data self.assertTrue(np.array_equal(obs, exp)) with self.assertRaises(AttributeError): self.dm_3x3.data = 'foo'
def wrapped_reader(fp, mode='U', mutate_fh=False, **kwargs): file_keys = [] files = [fp] for file_arg in file_args: if file_arg in kwargs: if kwargs[file_arg] is not None: file_keys.append(file_arg) files.append(kwargs[file_arg]) else: kwargs[file_arg] = None with open_files(files, mode) as fhs: try: for key, fh in zip(file_keys, fhs[1:]): kwargs[key] = fh generator = reader(fhs[0], **kwargs) if not isinstance(generator, types.GeneratorType): # Raise an exception to be handled next line, # because although reader executed without error, # it is not a generator. raise Exception() # If an exception is thrown at this point, it cannot # be a generator. If there was a `yield` statment, then # Python would have returned a generator regardless of the # content. This does not preclude the generator from # throwing exceptions. except Exception: raise InvalidRegistrationError("'%s' is not a " "generator." % reader.__name__) while True: yield next(generator)
def test_valid_files(self): for valid, kwargs, components in self.valid_files: for kwarg in kwargs: _drop_kwargs(kwarg, 'seq_num') constructor = kwarg.get('constructor', Sequence) expected = [ constructor( c['sequence'], metadata={'id': c['id'], 'machine_name': c['machine_name'], 'run_number': c['run_number'], 'lane_number': c['lane_number'], 'tile_number': c['tile_number'], 'x': c['x'], 'y': c['y'], 'index': c['index'], 'read_number': c['read_number']}, positional_metadata={ 'quality': np.array(c['quality'], dtype=np.uint8)}) for c in components] observed = list(_qseq_to_generator(valid, **kwarg)) self.assertEqual(len(expected), len(observed)) for o, e in zip(observed, expected): self.assertEqual(o, e)
def test_fasta_to_sequence_collection_and_alignment(self): test_cases = (self.empty, self.single, self.sequence_collection_different_type, self.lowercase_seqs) for constructor, reader_fn in ((SequenceCollection, _fasta_to_sequence_collection), (Alignment, _fasta_to_alignment)): # see comment in test_fasta_to_generator_valid_files (above) for # testing strategy for exp_list, kwargs, fasta_fps, qual_fps in test_cases: exp = constructor(exp_list) for fasta_fp in fasta_fps: obs = reader_fn(fasta_fp, **kwargs) self.assertEqual(len(obs), len(exp)) for o, e in zip(obs, exp): e = e.copy() del e.positional_metadata['quality'] self.assertEqual(o, e) for qual_fp in qual_fps: obs = reader_fn(fasta_fp, qual=qual_fp, **kwargs) self.assertEqual(obs, exp)
def cat(input_handles, output_handle, names=None, prefixes=None): """ Save k-mer profiles from several files to one k-mer profile file. :arg input_handles: Open readable k-mer profile file handles. :type input_handles: list(file-like object) :arg output_handle: Open writeable k-mer profile file handle. :type output_handle: h5py.File :arg names: Optional list of names of the k-mer profiles to consider. If not provided, all profiles in all input files are considered. :type names: list(str) :arg prefixes: Optional list of prefixes to use for the saved k-mer profile names, one per input file. If not provided, profile names are assumed to be disjoint and no prefix is used. :type prefixes: list(str) """ prefixes = prefixes or ['' for _ in input_handles] if len(prefixes) != len(input_handles): raise ValueError(PREFIX_COUNT_ERROR) for input_handle, prefix in zip(input_handles, prefixes): names_ = names or sorted(input_handle['profiles']) for name in names_: try: profile = klib.Profile.from_file(input_handle, name=name) except KeyError: # In this specific case, we ignore non-existing profiles, # since the user may have specified them for selecting from # one of the other input files. continue profile.save(output_handle, name=prefix + name)
def to_file(self, out_f, delimiter='\t'): """Save the dissimilarity matrix to file in delimited text format. Parameters ---------- out_f : file-like object or filename File-like object to write serialized data to, or name of file. If it's a file-like object, it must have a ``write`` method, and it won't be closed. Else, it is opened and closed after writing. delimiter : str, optional Delimiter used to separate elements in output format. See Also -------- from_file """ with open_file(out_f, 'w') as out_f: formatted_ids = self._format_ids(delimiter) out_f.write(formatted_ids) out_f.write('\n') for id_, vals in zip(self.ids, self.data): out_f.write(id_) out_f.write(delimiter) out_f.write(delimiter.join(np.asarray(vals, dtype=np.str))) out_f.write('\n')
def rename(self, target): """Return renamed contig line.""" for rx, so in zip(self.regex, self.soterm): match = rx.search(target) if match is None: continue match = match.groupdict() number = match.get('number', None) roman_num = match.get('roman', None) letter = match.get('letter', None) if number is not None: contig = self._format_number(number) elif roman_num is not None: if roman_num.isdigit(): contig = roman.roman_from_int(int(roman_num)) else: contig = roman_num elif letter is not None: contig = letter.upper() else: raise Exception("Regex {} doesn't contain a number, letter, " "or roman numeral field.".format(rx.pattern)) return "{}_{}{}".format(self.abbrev, so, contig) raise NoMatchException("Doesn't match any regular expression: " "{}".format(target))
def _setup_resources(self, requests, resources, force_initialize, enable_debug, skip_init): """Prepare the resources for work. Iterates over the resources and tries to prepare them for work by validating, resetting and initializing them. The locked and initialized resources are yielded instead of returned as a list so in case one got an exception in initialization, the user would know which resources were already initialized. Args: requests (tuple): list of the ResourceRequest. resources (list): list of the resources instances. force_initialize (bool): determines if the resources will be initialized even if their validation succeeds. enable_debug (bool): True to wrap the resource's method with debug. skip_init (bool): True to skip initialization and validation. Yields: tuple. pairs of locked and initialized resources (name, resource). Raises: ServerError. resource manager failed to lock resources. """ self.logger.debug("Setting up the locked resources") for resource, request in zip(resources, requests): if enable_debug: resource.enable_debug() resource.setup_resource(skip_init=skip_init, force_initialize=force_initialize) yield (request.name, resource)
def test_transpose(self): for dm, transpose in zip(self.dms, self.dm_transposes): self.assertEqual(dm.T, transpose) self.assertEqual(dm.transpose(), transpose) # We should get a reference to a different object back, even if the # transpose is the same as the original. self.assertTrue(dm.transpose() is not dm)
def smooth(input_handle_left, input_handle_right, output_handle_left, output_handle_right, names_left=None, names_right=None, summary='min', custom_summary=None, threshold=0): """ Smooth two profiles by collapsing sub-profiles. If the files contain more than one profile, they are linked by name and processed pairwise. :arg h5py.File input_handle_left, input_handle_right: Open readable k-mer profile handle. :arg h5py.File output_handle_left, output_handle_right: Open writeable k-mer profile file handle. :arg list(str) names_left, names_right: Optional list of names of the k-mer profiles to consider. If not provided, all profiles in the file are considered. :arg str summary: Name of the summary function. :arg str custom_summary: Custom summary function. :arg int threshold: Threshold for the summary function. """ names_left = names_left or sorted(input_handle_left['profiles']) names_right = names_right or sorted(input_handle_right['profiles']) if len(names_left) != len(names_right): raise ValueError(PAIRED_NAMES_COUNT_ERROR) if custom_summary: if re.match(_PYTHON_IMPORTABLE, custom_summary): # Importable definition, e.g. `package.module.summary_function`. module, name = custom_summary.rsplit('.', 1) summary_function = getattr(importlib.import_module(module), name) else: # Expression over `values`, e.g. `np.max(values)`. The `numpy` # package is available as `np`. summary_function = eval('lambda values: ' + custom_summary, {'np': np}) else: summary_function = metrics.summary[summary] dist = kdistlib.ProfileDistance(summary=summary_function, threshold=threshold) for name_left, name_right in zip(names_left, names_right): profile_left = klib.Profile.from_file(input_handle_left, name=name_left) profile_right = klib.Profile.from_file(input_handle_right, name=name_right) if profile_left.length != profile_right.length: raise ValueError(LENGTH_ERROR) dist.dynamic_smooth(profile_left, profile_right) profile_left.save(output_handle_left) profile_right.save(output_handle_right)
def test_results_of_tutorial(self): """Test against publication of Sens-Schoenfelder and Wegler (2006)""" plot = PLOT freq = [0.1875, 0.375, 0.75, 1.5, 3.0, 6.0, 12.0, 24.0] # page 1365 g0 = [2e-6, 2e-6, 1e-6, 1e-6, 1e-6, 1e-6, 1.5e-6, 2e-6] # fig 4 Qi = [2e-3, 2e-3, 1.8e-3, 2e-3, 1.5e-3, 1e-3, 5e-4, 2e-4] # fig 5 freq = np.array(freq) g0 = np.array(g0) b = np.array(Qi) * (2 * np.pi * np.array(freq)) M0 = {'20010623': 5.4e14, '20020722': 4.1e15, '20030222': 1.5e16, '20030322': 8.8e14, '20041205': 6.8e15} # table 1 kwargs = { 'plot_energies': plot, 'plot_optimization': plot, 'plot_fits': plot, 'plot_eventresult': plot, 'plot_eventsites': plot, 'plot_results': plot, 'plot_sites': plot, 'plot_sds': plot, 'plot_mags': plot } ind = np.logical_and(freq > 0.3, freq < 10) freq = freq[ind] g0 = np.array(g0)[ind] b = np.array(b)[ind] with tempdir(delete=not plot): run(create_config='conf.json', tutorial=True) result = run(conf='conf.json', **kwargs) if plot: plot_comparison(result['freq'], freq, result['g0'], g0, result['b'], b) M0_qopen = {evid.split('_')[0]: r.get('M0') for evid, r in result['events'].items()} temp = [(M0_qopen[evid], M0[evid]) for evid in sorted(M0)] M0_qopen, M0 = zip(*temp) M0 = np.array(M0) / 2 ** 0.5 # wrong surface correction in paper # There seems to be a wrong factor of around 1e4 in the observed # envelopes (fig. 3). the error could be in the paper or the script. # Therefore M0 should be different by a factor of 1e2, # but actually they differ by a factor of 10. This corresponds to # a magnitude differenc of 0.67. The magnitude comparison mags.pdf # suggests that the determined M0s by the script are ok. M0 = 10 * M0 # np.set_printoptions(formatter={'all':lambda x: '%.2g' % x}) # print('g0 test vs paper') # print(np.array(result['g0'])) # print(g0) # print('b test vs paper') # print(np.array(result['b'])) # print(b) # print('M0 test vs paper') # print(np.array(M0_qopen)) # print(M0) # plot_comparison(result['freq'], freq, result['g0'], g0, result['b'],b) np.testing.assert_equal(result['freq'], freq) # print(np.log10(result['g0'] / g0)) # print(np.log10(result['b'] / b)) # print(np.log10(M0_qopen / M0)) np.testing.assert_array_less(np.abs(np.log10(result['g0'] / g0)), 0.5) np.testing.assert_array_less(np.abs(np.log10(result['b'] / b)), 0.5) np.testing.assert_array_less(np.abs(np.log10(M0_qopen / M0)), 0.51)
def ParseFiles(self, knowledge_base, pathspecs, filedescs): del knowledge_base # Unused. fileset = { pathspec.path: obj for pathspec, obj in zip(pathspecs, filedescs) } return self.ParseFileset(fileset)
def _Combine(self, pathspecs, file_objects): result = {} for pathspec, file_object in zip(pathspecs, file_objects): path = pathspec.path file_object.seek(0) contents = utils.ReadFileBytesAsUnicode(file_object) result[path] = contents return result
def test_transpose(self): """Test retrieving transpose of dissimilarity matrix.""" for dm, transpose in zip(self.dms, self.dm_transposes): self.assertEqual(dm.T, transpose) self.assertEqual(dm.transpose(), transpose) # We should get a reference to a different object back, even if the # transpose is the same as the original. self.assertTrue(dm.transpose() is not dm)
def test_data(self): """Test retrieving/setting data matrix.""" for dm, exp in zip(self.dms, self.dm_redundant_forms): obs = dm.data self.assertTrue(np.array_equal(obs, exp)) with self.assertRaises(AttributeError): self.dm_3x3.data = 'foo'
def test_cosine_similarity(self): a = np.random.random_integers(1, 100, 100) b = np.random.random_integers(1, 100, 100) cs = sum([x[0] * x[1] for x in zip(a, b)]) / (math.sqrt(sum(x * x for x in a)) * math.sqrt(sum(x * x for x in b))) np.testing.assert_almost_equal(metrics.cosine_similarity(a, b), cs)
def test_multiset_many_zeros(self): a = np.random.random_integers(0, 2, 100) b = np.random.random_integers(0, 2, 100) pairwise = metrics.pairwise['prod'] values = [pairwise(i, j) for i, j in zip(a, b) if i or j] np.testing.assert_almost_equal(metrics.multiset(a, b, pairwise), sum(values) / (len(values) + 1))
def wrapped_reader(file, encoding=self._encoding, newline=self._newline, **kwargs): file_keys, files, io_kwargs = self._setup_locals( file_params, file, encoding, newline, kwargs) with open_files(files, mode='r', **io_kwargs) as fhs: kwargs.update(zip(file_keys, fhs[:-1])) for item in reader_function(fhs[-1], **kwargs): yield item
def zip_t(zip_sequence): """ Transformation for Sequence.zip :param zip_sequence: sequence to zip with :return: transformation """ return Transformation('zip(<sequence>)', lambda sequence: zip(sequence, zip_sequence), None)
def calpoints(self): """The calibration points, in (ch, kev) pairs. Returns: a list of 2-element tuples of (channel, energy[keV]) """ return list(zip(self.channels, self.energies))
def test_fasta_to_pairlist(self): """Correctly returns a list of (seq, label)""" exp = [("AC", "sid_0"), ("ACAGTC", "sid_1"), ("ACTA", "sid_2"), ("CAGT", "sid_3"), ("CATGAA", "sid_4"), ("A", "sid_5"), ("CATGTA", "sid_6"), ("CAA", "sid_7"), ("CACCA", "sid_8")] for obs, exp in zip(fasta_to_pairlist(self.seqs), exp): self.assertEqual(obs, exp)
def test_empty_str_replacement(self): exp = [('', 'ACGT', range(4)), ('foobar', 'GAU', None), (' foo bar', 'TAG', None), ('foo bar baz', 'A', [42])] obs = list(_format_fasta_like_records(self.gen, '', '', False)) self.assertEqual(len(obs), len(exp)) for o, e in zip(obs, exp): npt.assert_equal(o, e)
def zip_with_index_t(start): """ Transformation for Sequence.zip_with_index :return: transformation """ return Transformation('zip_with_index', lambda sequence: zip(sequence, count(start=start)), None)
def __init__(self, *args, **kwargs): """ Handles adding a variation to the cart or wishlist. When adding from the product page, the product is provided from the view and a set of choice fields for all the product options for this product's variations are added to the form. When the form is validated, the selected options are used to determine the chosen variation. A ``to_cart`` boolean keyword arg is also given specifying whether the product is being added to a cart or wishlist. If a product is being added to the cart, then its stock level is also validated. When adding to the cart from the wishlist page, a sku is given for the variation, so the creation of choice fields is skipped. """ self._product = kwargs.pop("product", None) self._to_cart = kwargs.pop("to_cart") super(AddProductForm, self).__init__(*args, **kwargs) # Adding from the wishlist with a sku, bail out. if args[0] is not None and args[0].get("sku", None): return # Adding from the product page, remove the sku field # and build the choice fields for the variations. del self.fields["sku"] option_fields = ProductVariation.option_fields() if not option_fields: return option_names, option_labels = list(zip(*[(f.name, f.verbose_name) for f in option_fields])) option_values = list(zip(*self._product.variations.filter( unit_price__isnull=False).values_list(*option_names))) if option_values: for i, name in enumerate(option_names): values = [_f for _f in set(option_values[i]) if _f] if values: field = forms.ChoiceField(label=option_labels[i], choices=make_choices(values), widget=forms.RadioSelect()) self.fields[name] = field print(i,name) print(self.fields[name].choices)
def _qseq_sniffer(fh): empty = True try: for _, line in zip(range(10), fh): _record_parser(line) empty = False return not empty, {} except QSeqFormatError: return False, {}
def test_count_by_record(self): counts_by_record = [utils.counts(record, 8) for record in utils.SEQUENCES] names = [str(i) for i, _ in enumerate(counts_by_record)] filename = self.empty() with open(self.fasta(utils.SEQUENCES, names=names)) as fasta_handle: with utils.open_profile(filename, 'w') as profile_handle: kmer.count([fasta_handle], profile_handle, 8, by_record=True) for name, counts in zip(names, counts_by_record): utils.test_profile_file(filename, counts, 8, name=name)
def ParseFiles(self, knowledge_base, pathspecs, filedescs): del knowledge_base # Unused. # Each file gives us only partial information for a particular PCI device. # Iterate through all the files first to create a dictionary encapsulating # complete information for each of the PCI device on the system. We need # all information for a PCI device before a proto for it can be created. # We will store data in a dictionary of dictionaries that looks like this: # data = { '0000:7f:0d.0': { 'class': '0x088000', # 'vendor': '0x8086', # 'device': '0x0ee1' } } # The key is location of PCI device on system in extended B/D/F notation # and value is a dictionary containing filename:data pairs for each file # returned by artifact collection for that PCI device. # Extended B/D/F is of form "domain:bus:device.function". Compile a regex # so we can use it to skip parsing files that don't match it. hc = r"[0-9A-Fa-f]" bdf_regex = re.compile(r"^%s+:%s+:%s+\.%s+" % (hc, hc, hc, hc)) # This will make sure that when a non-existing 'key' (PCI location) # is accessed for the first time a new 'key':{} pair is auto-created data = collections.defaultdict(dict) for pathspec, file_obj in zip(pathspecs, filedescs): filename = pathspec.Basename() # Location of PCI device is the name of parent directory of returned file. bdf = pathspec.Dirname().Basename() # Make sure we only parse files that are under a valid B/D/F folder if bdf_regex.match(bdf): # Remove newlines from all files except config. Config contains raw data # so we don't want to touch it even if it has a newline character. file_data = file_obj.read() if filename != "config": file_data = file_data.rstrip(b"\n") data[bdf][filename] = file_data # Now that we've captured all information for each PCI device. Let's convert # the dictionary into a list of PCIDevice protos. for bdf, bdf_filedata in iteritems(data): pci_device = rdf_client.PCIDevice() bdf_split = bdf.split(":") df_split = bdf_split[2].split(".") # We'll convert the hex into decimal to store in the protobuf. pci_device.domain = int(bdf_split[0], 16) pci_device.bus = int(bdf_split[1], 16) pci_device.device = int(df_split[0], 16) pci_device.function = int(df_split[1], 16) pci_device.class_id = bdf_filedata.get("class") pci_device.vendor = bdf_filedata.get("vendor") pci_device.vendor_device_id = bdf_filedata.get("device") pci_device.config = bdf_filedata.get("config") yield pci_device
def wrapped_reader(file, encoding=self._encoding, newline=self._newline, **kwargs): file_keys, files, io_kwargs = self._setup_locals( file_params, file, encoding, newline, kwargs) with open_files(files, mode='r', **io_kwargs) as fhs: # The primary file is at the end of fh because append # is cheaper than insert kwargs.update(zip(file_keys, fhs[:-1])) return reader_function(fhs[-1], **kwargs)
def model(self): """ The polygon model drawn as :class:`fatiando.mesher.Polygon` objects. """ m = [ Polygon(p.xy, {'density': d}) for p, d in zip(self.polygons, self.densities) ] return m
def merge(input_handle_left, input_handle_right, output_handle, names_left=None, names_right=None, merger='sum', custom_merger=None): """ Merge k-mer profiles. If the files contain more than one profile, they are linked by name and merged pairwise. The resulting profile name is set to that of the original profiles if they match, or to their concatenation otherwise. :arg h5py.File input_handle_left, input_handle_right: Open readable k-mer profile file handle. :arg h5py.File output_handle: Open writeable k-mer profile file handle. :arg list(str) names_left, names_right: Optional list of names of the k-mer profiles to consider. If not provided, all profiles in the file are considered. :arg function merger: Merge function. :arg str custom_merger: Custom merge function. """ names_left = names_left or sorted(input_handle_left['profiles']) names_right = names_right or sorted(input_handle_right['profiles']) if len(names_left) != len(names_right): raise ValueError(PAIRED_NAMES_COUNT_ERROR) if custom_merger: if re.match(_PYTHON_IMPORTABLE, custom_merger): # Importable definition, e.g. `package.module.merge_function`. module, name = custom_merger.rsplit('.', 1) merge_function = getattr(importlib.import_module(module), name) else: # Expression over `left` and `right`, e.g. `np.add(left, right)`. # The `numpy` package is available as `np`. merge_function = eval('lambda left, right: ' + custom_merger, {'np': np}) else: merge_function = metrics.mergers[merger] for name_left, name_right in zip(names_left, names_right): profile_left = klib.Profile.from_file(input_handle_left, name=name_left) profile_right = klib.Profile.from_file(input_handle_right, name=name_right) if profile_left.length != profile_right.length: raise ValueError(LENGTH_ERROR) profile_right.merge(profile_left, merge_function) if name_left == name_right: name = name_left else: name = name_left + '_' + name_right profile_right.save(output_handle, name=name)
def test_nacl(self): master_secret = 'No, I am your father'.encode() keys_1 = hkdf(master_secret, 5, salt=b'and pepper') keys_2 = hkdf(master_secret, 5, salt=b'and vinegar') for k1, k2 in zip(keys_1, keys_2): self.assertNotEqual(k1, k2, msg='using different salts should result in ' 'different keys')
def _add_bonds(self): bonds = [] bonds.extend([ _ for s in self.segments for _ in zip( s.atoms.select_atoms("calpha").ix, s.atoms.select_atoms("calpha").ix[1:]) ]) self._topology.add_TopologyAttr(topologyattrs.Bonds(bonds)) self._generate_from_topology()
def _add_to_cart(self, variation, quantity): """ Given a variation, creates the dict for posting to the cart form to add the variation, and posts it. """ field_names = [f.name for f in ProductVariation.option_fields()] data = dict(list(zip(field_names, variation.options()))) data["quantity"] = quantity self.client.post(variation.product.get_absolute_url(), data)
def test_multi_char_replacement(self): exp = [('', 'ACGT', range(4)), ('-.--.-foo-.--.--.--.-bar-.-', 'GAU', None), (' foo_-__-_ bar_-_', 'TAG', None), ('foo bar baz', 'A', [42])] obs = list(_format_fasta_like_records(self.gen, '-.-', '_-_', False)) self.assertEqual(len(obs), len(exp)) for o, e in zip(obs, exp): npt.assert_equal(o, e)
def address_pairs(fields): """ Zips address fields into pairs, appending the last field if the total is an odd number. """ pairs = list(zip(fields[::2], fields[1::2])) if len(fields) % 2: pairs.append(fields[-1]) return pairs