Esempio n. 1
0
            def wrapped_reader(fp, mode='U', mutate_fh=False, **kwargs):
                if not _is_iter_list(fp):
                    fp = [fp]

                with open_files(fp, mode) as fhs:
                    generator = reader(*fhs, **kwargs)

                    if mutate_fh or (not is_compound and
                                     _is_string_or_bytes(fp[0])):
                        while True:
                            yield next(generator)

                    else:
                        orig_positions = [fh.tell() for fh in fhs]
                        read_positions = orig_positions
                        try:
                            while True:
                                orig_positions = [fh.tell() for fh in fhs]

                                for fh, pos in zip(fhs, read_positions):
                                    fh.seek(pos)
                                next_result = next(generator)
                                read_positions = [fh.tell() for fh in fhs]

                                for fh, pos in zip(fhs, orig_positions):
                                    fh.seek(pos)

                                yield next_result
                        finally:
                            for fh, pos in zip(fhs, orig_positions):
                                fh.seek(pos)
Esempio n. 2
0
  def testFilterConsidersOffsetAndCount(self):
    client_id = self.client_ids[0]

    # Create five approval requests without granting them.
    for i in range(10):
      with test_lib.FakeTime(42 + i):
        self.RequestClientApproval(
            client_id.Basename(), reason="Request reason %d" % i)

    args = user_plugin.ApiListClientApprovalsArgs(
        client_id=client_id, offset=0, count=5)
    result = self.handler.Handle(args, token=self.token)

    # Approvals are returned newest to oldest, so the first five approvals
    # have reason 9 to 5.
    self.assertLen(result.items, 5)
    for item, i in zip(result.items, reversed(range(6, 10))):
      self.assertEqual(item.reason, "Request reason %d" % i)

    # When no count is specified, take all items from offset to the end.
    args = user_plugin.ApiListClientApprovalsArgs(client_id=client_id, offset=7)
    result = self.handler.Handle(args, token=self.token)

    self.assertLen(result.items, 3)
    for item, i in zip(result.items, reversed(range(0, 3))):
      self.assertEqual(item.reason, "Request reason %d" % i)
Esempio n. 3
0
def test_retrieve_contiguous(mock_jagged_raw, columns, contiguity):

    originals, ne, nc, dtype, segments, reader, rng = mock_jagged_raw

    if columns is not None:
        originals = [o[:, tuple(columns)] for o in originals]

    # sanity checks for wrong inputs
    with pytest.raises(ValueError) as excinfo:
        retrieve_contiguous(segments, columns, reader, dtype, ne, nc, 'wrong')
    assert 'Unknown contiguity scheme:' in str(excinfo.value)

    with pytest.raises(ValueError) as excinfo:
        retrieve_contiguous([(-1, 1)], columns, reader, dtype, ne, nc, contiguity)
    assert 'Out of bounds query (base=-1, size=1' in str(excinfo.value)

    with pytest.raises(ValueError) as excinfo:
        retrieve_contiguous([(0, 100000)], columns, reader, dtype, ne, nc, contiguity)
    assert 'Out of bounds query (base=0, size=100000' in str(excinfo.value)

    # insertion order
    views = retrieve_contiguous(segments, columns, reader, dtype, ne, nc, contiguity)
    for o, v in zip(originals, views):
        assert np.allclose(o, v)

    # random order
    o_s = list(zip(originals, segments))
    rng.shuffle(o_s)
    originals, segments = zip(*o_s)
    views = retrieve_contiguous(segments, columns, reader, dtype, ne, nc, contiguity)
    for o, v in zip(originals, views):
        assert np.allclose(o, v)
Esempio n. 4
0
def _fasta_sniffer(fh):
    # Strategy:
    #   Ignore up to 5 blank/whitespace-only lines at the beginning of the
    #   file. Read up to 10 FASTA records. If at least one record is read (i.e.
    #   the file isn't empty) and no errors are thrown during reading, assume
    #   the file is in FASTA format. Next, try to parse the file as QUAL, which
    #   has stricter requirements. If this succeeds, do *not* identify the file
    #   as FASTA since we don't want to sniff QUAL files as FASTA (technically
    #   they can be read as FASTA since the sequences aren't validated but it
    #   probably isn't what the user wanted). Also, if we add QUAL as its own
    #   file format in the future, we wouldn't want the FASTA and QUAL sniffers
    #   to both identify a QUAL file.
    if _too_many_blanks(fh, 5):
        return False, {}

    num_records = 10
    try:
        not_empty = False
        for _ in zip(range(num_records), _fasta_to_generator(fh)):
            not_empty = True

        if not_empty:
            fh.seek(0)
            try:
                list(zip(range(num_records),
                         _parse_fasta_raw(fh, _parse_quality_scores, 'QUAL')))
            except FASTAFormatError:
                return True, {}
            else:
                return False, {}
        else:
            return False, {}
    except FASTAFormatError:
        return False, {}
Esempio n. 5
0
    def test_fasta_to_sequence_collection_and_alignment(self):
        test_cases = (self.empty, self.single,
                      self.sequence_collection_different_type)

        for constructor, reader_fn in ((SequenceCollection,
                                        _fasta_to_sequence_collection),
                                       (Alignment,
                                        _fasta_to_alignment)):
            # see comment in test_fasta_to_generator_valid_files (above) for
            # testing strategy
            for exp_list, kwargs, fasta_fps, qual_fps in test_cases:
                exp = constructor(exp_list)

                for fasta_fp in fasta_fps:
                    obs = reader_fn(fasta_fp, **kwargs)

                    # TODO remove this custom equality testing code when
                    # SequenceCollection has an equals method (part of #656).
                    # We need this method to include IDs and description in the
                    # comparison (not part of SequenceCollection.__eq__).
                    self.assertEqual(len(obs), len(exp))
                    for o, e in zip(obs, exp):
                        self.assertTrue(o.equals(e, ignore=['quality']))

                    for qual_fp in qual_fps:
                        obs = reader_fn(fasta_fp, qual=qual_fp, **kwargs)

                        # TODO remove this custom equality testing code when
                        # SequenceCollection has an equals method (part of
                        # #656). We need this method to include IDs and
                        # description in the comparison (not part of
                        # SequenceCollection.__eq__).
                        self.assertEqual(obs, exp)
                        for o, e in zip(obs, exp):
                            self.assertTrue(o.equals(e))
Esempio n. 6
0
    def __init__(self):
        self.quads_index = []

        with open(self.names_file, 'r') as fh:
            self.names = [name.strip() for name in fh]

        with open(self.quadsindex_file, 'r') as fh:
            indexes = []
            for index in fh:
                indexes += [n.strip() for n in index.split(' ') if n != '']

        self.lons_per_lat = dict(list(zip(
            self.quads_order,
            [indexes[x:x + 91] for x in range(0, len(indexes), 91)]
        )))

        self.lat_begins = {}

        for quad, index in list(self.lons_per_lat.items()):
            begin = 0
            end = -1
            begins = []
            n = 0

            for item in index:
                n += 1
                begin = end + 1
                begins.append(begin)
                end += int(item)

            self.lat_begins[quad] = begins

        self.lons = {}
        self.fenums = {}
        for quad, sect_file in zip(self.quads_order, self.sect_files):
            sect = []
            with open(sect_file, 'r') as fh:
                for line in fh:
                    sect += [int(v) for v in line.strip().split(' ')
                             if v != '']

            lons = []
            fenums = []
            n = 0
            for item in sect:
                n += 1
                if n % 2:
                    lons.append(item)
                else:
                    fenums.append(item)

            self.lons[quad] = lons
            self.fenums[quad] = fenums

        with open(self.numbers_file, 'rt') as csvfile:
            FE_csv = csv.reader(csvfile, delimiter=native_str(';'),
                                quotechar=native_str('#'),
                                skipinitialspace=True)
            self.by_number = \
                dict((int(row[0]), row[1]) for row in FE_csv if len(row) > 1)
Esempio n. 7
0
    def test_fasta_to_generator_valid_files(self):
        test_cases = (self.empty, self.single, self.multi,
                      self.odd_labels_different_type,
                      self.sequence_collection_different_type,
                      self.lowercase_seqs)

        # Strategy:
        #   for each fasta file, read it without its corresponding qual file,
        #   and ensure observed vs. expected match, ignoring quality scores in
        #   expected. next, parse the current fasta file with each
        #   corresponding quality file and ensure that observed vs. expected
        #   match, this time taking quality scores into account. this
        #   sufficiently exercises parsing a standalone fasta file and paired
        #   fasta/qual files
        for exp, kwargs, fasta_fps, qual_fps in test_cases:
            for fasta_fp in fasta_fps:
                obs = list(_fasta_to_generator(fasta_fp, **kwargs))
                self.assertEqual(len(obs), len(exp))
                for o, e in zip(obs, exp):
                    e = e.copy()
                    del e.positional_metadata['quality']
                    self.assertEqual(o, e)

                for qual_fp in qual_fps:
                    obs = list(_fasta_to_generator(fasta_fp, qual=qual_fp,
                                                   **kwargs))

                    self.assertEqual(len(obs), len(exp))
                    for o, e in zip(obs, exp):
                        self.assertEqual(o, e)
Esempio n. 8
0
def _compute_score_and_traceback_matrices(
        seq1, seq2, gap_open_penalty, gap_extend_penalty, substitution_matrix,
        new_alignment_score=-np.inf, init_matrices_f=_init_matrices_nw):
    """Return dynamic programming (score) and traceback matrices
    """
    # cache some values for quicker/simpler access
    aend = _traceback_encoding['alignment-end']
    match = _traceback_encoding['match']
    vgap = _traceback_encoding['vertical-gap']
    hgap = _traceback_encoding['horizontal-gap']

    new_alignment_score = (new_alignment_score, aend)

    # Initialize a matrix to use for scoring the alignment and for tracing
    # back the best alignment
    score_matrix, traceback_matrix = init_matrices_f(
        seq1, seq2, gap_open_penalty, gap_extend_penalty)

    # Iterate over the characters in sequence two (which will correspond
    # to the vertical sequence in the matrix)
    # Note that i corresponds to column numbers, as in 'Biological Sequence
    # Analysis'
    for i, c2 in zip(range(1, len(seq2)+1), seq2):
        # Iterate over the characters in sequence one (which will
        # correspond to the horizontal sequence in the matrix)
        # Note that j corresponds to row numbers, as in 'Biological Sequence
        # Analysis'
        for j, c1 in zip(range(1, len(seq1)+1), seq1):
            try:
                substitution_score = substitution_matrix[c1][c2]
            except KeyError:
                offending_chars = \
                    [c for c in (c1, c2) if c not in substitution_matrix]
                raise ValueError(
                    "One of the sequences contains a character that is not "
                    "contained in the substitution matrix. Are you using "
                    "an appropriate substitution matrix for your sequence "
                    "type (e.g., a nucleotide substitution matrix does not "
                    "make sense for aligning protein sequences)? Does your "
                    "sequence contain invalid characters? The offending "
                    "character(s) is: %s." % ', '.join(offending_chars))
            diag_score = (score_matrix[i-1, j-1] + substitution_score, match)
            if traceback_matrix[i-1, j] == vgap:
                # gap extend, because the cell above was also a gap
                up_score = (score_matrix[i-1, j] - gap_extend_penalty, vgap)
            else:
                # gap open, because the cell above was not a gap
                up_score = (score_matrix[i-1, j] - gap_open_penalty, vgap)
            if traceback_matrix[i, j-1] == hgap:
                # gap extend, because the cell to the left was also a gap
                left_score = (score_matrix[i, j-1] - gap_extend_penalty, hgap)
            else:
                # gap open, because the cell to the left was not a gap
                left_score = (score_matrix[i, j-1] - gap_open_penalty, hgap)
            best_score = _first_largest([new_alignment_score, left_score,
                                         diag_score, up_score])
            score_matrix[i, j] = best_score[0]
            traceback_matrix[i, j] = best_score[1]
    return score_matrix, traceback_matrix
Esempio n. 9
0
    def _common_creation_steps(cls, md_template, obj_id):
        r"""Executes the common creation steps

        Parameters
        ----------
        md_template : DataFrame
            The metadata template file contents indexed by sample ids
        obj_id : int
            The id of the object being created
        """
        with TRN:
            cls._check_subclass()

            # Get some useful information from the metadata template
            sample_ids = md_template.index.tolist()
            headers = sorted(md_template.keys().tolist())

            # Insert values on template_sample table
            values = [[obj_id, s_id] for s_id in sample_ids]
            sql = """INSERT INTO qiita.{0} ({1}, sample_id)
                     VALUES (%s, %s)""".format(cls._table, cls._id_column)
            TRN.add(sql, values, many=True)

            # Insert rows on *_columns table
            datatypes = get_datatypes(md_template.ix[:, headers])
            # psycopg2 requires a list of tuples, in which each tuple is a set
            # of values to use in the string formatting of the query. We have
            # all the values in different lists (but in the same order) so use
            # zip to create the list of tuples that psycopg2 requires.
            values = [[obj_id, h, d] for h, d in zip(headers, datatypes)]
            sql = """INSERT INTO qiita.{0} ({1}, column_name, column_type)
                     VALUES (%s, %s, %s)""".format(cls._column_table,
                                                   cls._id_column)
            TRN.add(sql, values, many=True)

            # Create table with custom columns
            table_name = cls._table_name(obj_id)
            column_datatype = ["%s %s" % (col, dtype)
                               for col, dtype in zip(headers, datatypes)]
            sql = """CREATE TABLE qiita.{0} (
                        sample_id varchar NOT NULL, {1},
                        CONSTRAINT fk_{0} FOREIGN KEY (sample_id)
                            REFERENCES qiita.study_sample (sample_id)
                            ON UPDATE CASCADE
                     )""".format(table_name, ', '.join(column_datatype))
            TRN.add(sql)

            # Insert values on custom table
            values = as_python_types(md_template, headers)
            values.insert(0, sample_ids)
            values = [list(v) for v in zip(*values)]
            sql = """INSERT INTO qiita.{0} (sample_id, {1})
                     VALUES (%s, {2})""".format(
                table_name, ", ".join(headers),
                ', '.join(["%s"] * len(headers)))
            TRN.add(sql, values, many=True)

            # Execute all the steps
            TRN.execute()
Esempio n. 10
0
def _traceback(traceback_matrix, score_matrix, aln1, aln2, start_row, start_col):
    # cache some values for simpler reference
    aend = _traceback_encoding["alignment-end"]
    match = _traceback_encoding["match"]
    vgap = _traceback_encoding["vertical-gap"]
    hgap = _traceback_encoding["horizontal-gap"]
    gap_character = aln1.dtype.default_gap_char

    # initialize the result alignments
    aln1_sequence_count = aln1.shape.sequence
    aligned_seqs1 = [[] for e in range(aln1_sequence_count)]

    aln2_sequence_count = aln2.shape.sequence
    aligned_seqs2 = [[] for e in range(aln2_sequence_count)]

    current_row = start_row
    current_col = start_col

    best_score = score_matrix[current_row, current_col]
    current_value = None

    while current_value != aend:
        current_value = traceback_matrix[current_row, current_col]

        if current_value == match:
            for aligned_seq, input_seq in zip(aligned_seqs1, aln1):
                aligned_seq.append(str(input_seq[current_col - 1]))
            for aligned_seq, input_seq in zip(aligned_seqs2, aln2):
                aligned_seq.append(str(input_seq[current_row - 1]))
            current_row -= 1
            current_col -= 1
        elif current_value == vgap:
            for aligned_seq in aligned_seqs1:
                aligned_seq.append(gap_character)
            for aligned_seq, input_seq in zip(aligned_seqs2, aln2):
                aligned_seq.append(str(input_seq[current_row - 1]))
            current_row -= 1
        elif current_value == hgap:
            for aligned_seq, input_seq in zip(aligned_seqs1, aln1):
                aligned_seq.append(str(input_seq[current_col - 1]))
            for aligned_seq in aligned_seqs2:
                aligned_seq.append(gap_character)
            current_col -= 1
        elif current_value == aend:
            continue
        else:
            raise ValueError("Invalid value in traceback matrix: %s" % current_value)

    for i in range(aln1_sequence_count):
        aligned_seq = "".join(aligned_seqs1[i][::-1])
        constructor = aln1.dtype
        aligned_seqs1[i] = constructor(aligned_seq)

    for i in range(aln2_sequence_count):
        aligned_seq = "".join(aligned_seqs2[i][::-1])
        constructor = aln2.dtype
        aligned_seqs2[i] = constructor(aligned_seq)

    return aligned_seqs1, aligned_seqs2, best_score, current_col, current_row
Esempio n. 11
0
def _write_csv(input_files, results, csv_path=None):
    """
    Writes out results as a csv.

    :param input_files: List of filenames for each respective metadata.
    :param results: List of metadata dictionaries.
    :param csv_path: Path to write out csv file, defaults to stdout.

    :raises IOError: If csv could not be written out.
    """
    scan_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')

    # Add/Teak metadata.
    for inputfilename, metadata in zip(input_files, results):
        # Add scan date.
        metadata[u'scan_date'] = scan_date
        if u'inputfilename' not in metadata:
            metadata[u'inputfilename'] = inputfilename

        # Flatten 'other' entry so nested values get their own columns,
        # are more readable, and easier to individually analyze.
        #
        # Example:
        #   {'other': {"unique_entry": "value", "unique_key": "value2"}}
        #   Results in columns: other, other.unique_entry, other.unique_key
        if u'other' in metadata:
            for sub_key, sub_value in metadata[u'other'].items():
                metadata[u'other.{}'.format(convert_to_unicode(sub_key))] = sub_value
            del metadata[u'other']

        # Split outputfile into multiple fields.
        if u'outputfile' in metadata:
            value = list(zip(*metadata[u'outputfile']))
            metadata[u'outputfile.name'] = value[0]
            metadata[u'outputfile.description'] = value[1]
            metadata[u'outputfile.md5'] = value[2]
            del metadata[u'outputfile']

    # Sort columns, but with PREFIX_COLUMNS showing up first.
    column_names = set(itertools.chain(*(metadata.keys() for metadata in results)))
    column_names = sorted(
        column_names, key=lambda x: str(_STD_CSV_COLUMNS.index(x)) if x in _STD_CSV_COLUMNS else str(x))

    # Reformat metadata and write to CSV
    if csv_path is None:
        csvfile = sys.stdout
    else:
        csvfile = open(csv_path, 'wb' if six.PY2 else 'w')

    try:
        dw = csv.DictWriter(csvfile, fieldnames=column_names, lineterminator='\n')
        dw.writeheader()
        for metadata in results:
            dw.writerow({k: _format_metadata_value(v) for k, v in metadata.items()})
    finally:
        if csv_path:
            csvfile.close()
Esempio n. 12
0
def hinton(inarray, max_value=None, use_default_ticks=True):
    """Plot Hinton diagram for visualizing the values of a 2D array.

    Plot representation of an array with positive and negative values
    represented by white and black squares, respectively. The size of each
    square represents the magnitude of each value.

    Unlike the hinton demo in the matplotlib gallery [1]_, this implementation
    uses a RegularPolyCollection to draw squares, which is much more efficient
    than drawing individual Rectangles.

    .. note::
        This function inverts the y-axis to match the origin for arrays.

    .. [1] http://matplotlib.sourceforge.net/examples/api/hinton_demo.html

    Parameters
    ----------
    inarray : array
        Array to plot.
    max_value : float
        Any *absolute* value larger than `max_value` will be represented by a
        unit square.
    use_default_ticks: boolean
        Disable tick-generation and generate them outside this function.
    """

    ax = plt.gca()
    ax.set_axis_bgcolor('gray')
    # make sure we're working with a numpy array, not a numpy matrix
    inarray = np.asarray(inarray)
    height, width = inarray.shape
    if max_value is None:
        max_value = 2**np.ceil(np.log(np.max(np.abs(inarray)))/np.log(2))
    values = np.clip(inarray/max_value, -1, 1)
    rows, cols = np.mgrid[:height, :width]

    pos = np.where(values > 0)
    neg = np.where(values < 0)
    for idx, color in zip([pos, neg], ['white', 'black']):
        if len(idx[0]) > 0:
            xy = list(zip(cols[idx], rows[idx]))
            circle_areas = np.pi / 2 * np.abs(values[idx])
            squares = SquareCollection(sizes=circle_areas,
                                       offsets=xy, transOffset=ax.transData,
                                       facecolor=color, edgecolor=color)
            ax.add_collection(squares, autolim=True)

    ax.axis('scaled')
    # set data limits instead of using xlim, ylim.
    ax.set_xlim(-0.5, width-0.5)
    ax.set_ylim(height-0.5, -0.5)

    if use_default_ticks:
        ax.xaxis.set_major_locator(IndexLocator())
        ax.yaxis.set_major_locator(IndexLocator())
Esempio n. 13
0
    def test_getWaveformNSCL(self):
        """
        Tests getWaveformNSCL method.
        """
        client = Client(host="137.227.224.97", port=2061)
        # now - 5 hours
        t = UTCDateTime() - 5 * 60 * 60
        duration_long = 3600.0
        duration = 1.0
        components = ["1", "2", "Z"]
        # try one longer request to see if fetching multiple blocks works
        st = client.getWaveformNSCL("IUANMO BH.00", t, duration_long)
        # merge to avoid failing tests simply due to gaps
        st.merge()
        st.sort()
        self.assertTrue(len(st) == 3)
        for tr, component in zip(st, components):
            stats = tr.stats
            self.assertTrue(stats.station == "ANMO")
            self.assertTrue(stats.network == "IU")
            self.assertTrue(stats.location == "00")
            self.assertTrue(stats.channel == "BH" + component)
            self.assertTrue(stats.endtime - stats.starttime == duration_long)
            # if the following fails this is likely due to a change at the
            # requested station and simply has to be adapted
            self.assertTrue(stats.sampling_rate == 20.0)
            self.assertTrue(len(tr) == 72001)
        # now use shorter piece, this is faster and less error prone (gaps etc)
        st = client.getWaveformNSCL("IUANMO BH.00", t, duration)
        st.sort()
        # test returned stream
        self.assertTrue(len(st) == 3)
        for tr, component in zip(st, components):
            stats = tr.stats
            self.assertTrue(stats.station == "ANMO")
            self.assertTrue(stats.network == "IU")
            self.assertTrue(stats.location == "00")
            self.assertTrue(stats.channel == "BH" + component)
            self.assertTrue(stats.endtime - stats.starttime == duration)
            # if the following fails this is likely due to a change at the
            # requested station and simply has to be adapted
            self.assertTrue(stats.sampling_rate == 20.0)
            self.assertTrue(len(tr) == 21)

        # try a series of regex patterns that should return the same data
        st = client.getWaveformNSCL("IUANMO BH", t, duration)
        patterns = ["IUANMO BH...",
                    "IUANMO BH.*",
                    "IUANMO BH[Z12].*",
                    "IUANMO BH[Z12]..",
                    "IUANMO B.*",
                    "..ANMO B.*"]
        for pattern in patterns:
            st2 = client.getWaveformNSCL(pattern, t, duration)
            self.assertTrue(st == st2)
Esempio n. 14
0
 def _to_values(self, collection):
     """
     Regroup values in tuples or dicts for each "instance".
     Exemple: Given this result from redis: ['id1', 'name1', 'id2', 'name2']
      tuples: [('id1', 'name1'), ('id2', 'name2')]
      dicts:  [{'id': 'id1', 'name': 'name1'}, {'id': 'id2', 'name': 'name2'}]
     """
     result = zip(*([iter(collection)] * len(self._values['fields']['names'])))
     if self._values['mode'] == 'dicts':
         result = (dict(zip(self._values['fields']['names'], a_result)) for a_result in result)
     return result
Esempio n. 15
0
 def _button_press_callback(self, event):
     """
     What actions to perform when a mouse button is clicked
     """
     if event.inaxes != self.modelax:
         return
     if event.button == 1 and not self._drawing and self.polygons:
         self._lastevent = event
         for line, poly in zip(self.lines, self.polygons):
             poly.set_animated(False)
             line.set_animated(False)
             line.set_color([0, 0, 0, 0])
         self.canvas.draw()
         # Find out if a click happened on a vertice
         # and which vertice of which polygon
         self._ipoly, self._ivert = self._get_polygon_vertice_id(event)
         if self._ipoly is not None:
             self.density_slider.set_val(self.densities[self._ipoly])
             self.polygons[self._ipoly].set_animated(True)
             self.lines[self._ipoly].set_animated(True)
             self.lines[self._ipoly].set_color([0, 1, 0, 0])
             self.canvas.draw()
             self.background = self.canvas.copy_from_bbox(self.modelax.bbox)
             self.modelax.draw_artist(self.polygons[self._ipoly])
             self.modelax.draw_artist(self.lines[self._ipoly])
             self.canvas.blit(self.modelax.bbox)
     elif self._drawing:
         if event.button == 1:
             self._xy.append([event.xdata, event.ydata])
             self._drawing_plot.set_data(list(zip(*self._xy)))
             self.canvas.restore_region(self.background)
             self.modelax.draw_artist(self._drawing_plot)
             self.canvas.blit(self.modelax.bbox)
         elif event.button == 3:
             if len(self._xy) >= 3:
                 density = self.density_slider.val
                 poly, line = self._make_polygon(self._xy, density)
                 self.polygons.append(poly)
                 self.lines.append(line)
                 self.densities.append(density)
                 self.modelax.add_patch(poly)
                 self.modelax.add_line(line)
                 self._drawing_plot.remove()
                 self._drawing_plot = None
                 self._xy = None
                 self._drawing = False
                 self._ipoly = len(self.polygons) - 1
                 self.lines[self._ipoly].set_color([0, 1, 0, 0])
                 self.dataax.set_title(self.instructions)
                 self.canvas.draw()
                 self._update_data()
                 self._update_data_plot()
Esempio n. 16
0
def test_roundtrip(jagged_raw, dataset, columns):
    jagged_raw, path = jagged_raw
    jagged_raw = partial(jagged_raw, path=path)
    rng, originals, ncol = dataset

    # Write
    keys = []
    with jagged_raw() as jr:
        total = 0
        assert jr.dtype is None
        assert jr.shape is None
        for original in originals:
            key = jr.append(original)
            assert jr.is_writing
            keys.append(key)
            total += len(original)
            assert len(jr) == total
        assert jr.dtype == originals[0].dtype
        assert jr.shape == (total, ncol)

    # Read
    def test_read(originals, keys):

        if columns is not None:
            originals = [o[:, columns] for o in originals]

        # test read, one by one
        with jagged_raw() as jr:
            for original, key in zip(originals, keys):
                roundtripped = jr.get([key], columns=columns)[0]
                assert np.allclose(original, roundtripped)

        # test read, in a batch
        with jagged_raw() as jr:
            for original, roundtripped in zip(originals, jr.get(keys, columns=columns)):
                assert np.allclose(original, roundtripped)

    # read all
    # with jagged_raw() as jr:
    #     for original, roundtripped in zip(originals, jr.get(columns=columns)):
    #         original = original if columns is None else original[:, columns]
    #         assert np.allclose(original, roundtripped)

    # read in insertion order
    test_read(originals, keys)

    # read in random order
    or_s = list(zip(originals, keys))
    rng.shuffle(or_s)
    originals, keys = zip(*or_s)
    test_read(originals, keys)
Esempio n. 17
0
def _write_csv(input_files, results, csv_path, base64_outputfiles=False):
    """
    Writes out results as a csv.

    :param input_files: List of filenames for each respective metadata.
    :param results: List of metadata dictionaries.
    :param csv_path: Path to write out csv file.
    :param base64_outputfiles: Whether to include base64 outputfiles.
    :raises IOError: If csv could not be written out.
    """
    scan_date = time.ctime()

    # Add/Teak metadata.
    for inputfilename, metadata in zip(input_files, results):
        # Add scan date.
        metadata['scan_date'] = scan_date
        if 'inputfilename' not in metadata:
            metadata['inputfilename'] = inputfilename

        # Flatten 'other' entry so nested values get their own columns,
        # are more readable, and easier to individually analyze.
        #
        # Example:
        #   {'other': {"unique_entry": "value", "unique_key": "value2"}}
        #   Results in columns: other, other.unique_entry, other.unique_key
        if 'other' in metadata:
            for sub_key, sub_value in metadata['other'].items():
                metadata['other.{}'.format(convert_to_unicode(sub_key))] = sub_value
            del metadata['other']

        # Split outputfile into multiple fields.
        if 'outputfile' in metadata:
            value = list(zip(*metadata['outputfile']))
            metadata['outputfile.name'] = value[0]
            metadata['outputfile.description'] = value[1]
            metadata['outputfile.md5'] = value[2]
            if len(value) > 3 and base64_outputfiles:
                metadata['outputfile.base64'] = value[3]
            del metadata['outputfile']

    # Sort columns, but with PREFIX_COLUMNS showing up first.
    column_names = set(itertools.chain(*(metadata.keys() for metadata in results)))
    column_names = sorted(
        column_names, key=lambda x: str(_STD_CSV_COLUMNS.index(x)) if x in _STD_CSV_COLUMNS else x)

    # Reformat metadata and write to CSV
    with open(csv_path, 'wb' if sys.version_info.major < 3 else 'w') as csvfile:
        dw = csv.DictWriter(csvfile, fieldnames=column_names, lineterminator='\n')
        dw.writeheader()
        for metadata in results:
            dw.writerow({k: _format_metadata_value(v) for k, v in metadata.items()})
Esempio n. 18
0
 def test_count_multi_by_record(self):
     counts_by_record_left = [utils.counts(record, 8) for record in utils.SEQUENCES_LEFT]
     counts_by_record_right = [utils.counts(record, 8) for record in utils.SEQUENCES_RIGHT]
     names_left = [str(i) for i, _ in enumerate(counts_by_record_left)]
     names_right = [str(i) for i, _ in enumerate(counts_by_record_right)]
     filename = self.empty()
     with open(self.fasta(utils.SEQUENCES_LEFT, names=names_left)) as handle_left:
         with open(self.fasta(utils.SEQUENCES_RIGHT, names=names_right)) as handle_right:
             with utils.open_profile(filename, 'w') as profile_handle:
                 kmer.count([handle_left, handle_right], profile_handle, 8, names=['a', 'b'], by_record=True)
     for name, counts in zip(names_left, counts_by_record_left):
         utils.test_profile_file(filename, counts, 8, name='a_' + name)
     for name, counts in zip(names_right, counts_by_record_right):
         utils.test_profile_file(filename, counts, 8, name='b_' + name)
Esempio n. 19
0
    def test_read(originals, keys):

        if columns is not None:
            originals = [o[:, columns] for o in originals]

        # test read, one by one
        with jagged_raw() as jr:
            for original, key in zip(originals, keys):
                roundtripped = jr.get([key], columns=columns)[0]
                assert np.allclose(original, roundtripped)

        # test read, in a batch
        with jagged_raw() as jr:
            for original, roundtripped in zip(originals, jr.get(keys, columns=columns)):
                assert np.allclose(original, roundtripped)
Esempio n. 20
0
    def __eq__(self, other):
        r"""The equality operator.

        Parameters
        ----------
        other : `SequenceCollection`
            The `SequenceCollection` to test for equality against.

        Returns
        -------
        bool
            Indicates whether `self` and `other` are equal.

        Notes
        -----
        `SequenceCollection` objects are equal if they are the same type,
        contain the same number of sequences, and if each of the
        `skbio.core.sequence.BiologicalSequence` objects, in order, are equal.

        .. shownumpydoc

        """
        if self.__class__ != other.__class__:
            return False
        elif len(self) != len(other):
            return False
        else:
            for self_seq, other_seq in zip(self, other):
                if self_seq != other_seq:
                    return False
        return True
Esempio n. 21
0
File: kmer.py Progetto: LUMC/kPAL
def count(input_handles, output_handle, size, names=None, by_record=False):
    """
    Make k-mer profiles from FASTA files.

    :arg input_handles: Open readable FASTA file handles.
    :type input_handles: list(file-like object)
    :arg output_handle: Open writeable k-mer profile file handle.
    :type output_handle: h5py.File
    :arg size: Size of k.
    :type size: int
    :arg names: Optional list of names for the created k-mer profiles (must
      have the same length as `input_handles`). If not provided, profiles are
      named according to the input filenames, or numbered consecutively from 1
      if no filenames are available.
    :type names: list(str)
    :arg bool by_record: If `True`, make a k-mer profile per FASTA record
      instead of a k-mer profile per FASTA file. Profiles are named by the
      record names and prefixed according to `names` if more than one FASTA
      file is given).
    """
    names = names or [_name_from_handle(h) for h in input_handles]

    if len(names) != len(input_handles):
        raise ValueError(NAMES_COUNT_ERROR)

    for input_handle, name in zip(input_handles, names):
        if by_record:
            prefix = name if len(input_handles) > 1 else None
            profiles = klib.Profile.from_fasta_by_record(input_handle, size,
                                                         prefix=prefix)
        else:
            profiles = [klib.Profile.from_fasta(input_handle, size, name=name)]

        for profile in profiles:
            profile.save(output_handle)
def stylable_result_headers(cl):
    """
    Reuse the existing result_headers() iterator,
    and add a `col-FIELD_NAME` class to the header, and fieldname to assist JavaScript
    cl = The django ChangeList object
    """
    for field_name, header in zip(cl.list_display, result_headers(cl)):
        header['field_name'] = field_name  # For JavaScript

        if header.get('class_attrib'):
            # Remove any sorting marker for mptt tables, because they are not sortable.
            if hasattr(cl.model, '_mptt_meta'):
                header['class_attrib'] = header['class_attrib'].replace(
                    'sortable', '').replace('sorted',
                                            '').replace('ascending', '')

            header['class_attrib'] = mark_safe(header['class_attrib'].replace(
                'class="', 'class="col-%s ' % field_name))
        else:
            header['class_attrib'] = mark_safe(' class="col-%s"' % field_name)

        if 'url_primary' in header and 'url' not in header:
            header['url'] = header[
                'url_primary']  # Django 1.3 template compatibility.

        yield header
Esempio n. 23
0
def plot_mags(result, fname=None, title=None, xlim=None, ylim=None,
              figsize=None):
    fig = plt.figure(figsize=figsize)
    ax = fig.add_subplot(111)
    temp = [(r['Mcat'], r['Mw']) for r in result['events'].values()
            if r.get('Mcat') is not None and r.get('Mw') is not None]
    if len(temp) == 0:
        return
    Mcat, Mw = zip(*temp)
    ax.plot(Mcat, Mw, 'ok', ms=MS)
    if xlim is not None:
        mmin, mmax = xlim
    else:
        mmin, mmax = np.min(Mcat), np.max(Mcat)
    m = np.linspace(mmin, mmax, 100)

    if len(Mw) > 2:
        a, b = linear_fit(Mw, Mcat)
        ax.plot(m, a * m + b, '-m', label='%.2fM %+.2f' % (a, b))
    if len(Mw) > 3:
        _, b2 = linear_fit(Mw, Mcat, m=1)
        ax.plot(m, m + b2, '--m', label='M %+.2f' % (b2,))
    if len(Mw) > 2:
        ax.legend(loc='lower right')
    if xlim:
        ax.set_xlim(xlim)
    ax.set_xlabel('M from catalog')
    ax.set_ylabel('Mw from inversion')
    _savefig(fig, fname=fname, title=title)
Esempio n. 24
0
    def __eq__(self, other):
        r"""The equality operator.

        Parameters
        ----------
        other : `SequenceCollection`
            The `SequenceCollection` to test for equality against.

        Returns
        -------
        bool
            Indicates whether `self` and `other` are equal.

        Notes
        -----
        `SequenceCollection` objects are equal if they are the same type,
        contain the same number of sequences, and if each of the
        `skbio.core.sequence.BiologicalSequence` objects, in order, are equal.

        .. shownumpydoc

        """
        if self.__class__ != other.__class__:
            return False
        elif len(self) != len(other):
            return False
        else:
            for self_seq, other_seq in zip(self, other):
                if self_seq != other_seq:
                    return False
        return True
Esempio n. 25
0
    def coerce_zadd_args(*args, **kwargs):
        """
        Take arguments attended by a zadd call, named or not, and return a flat list
        that can be used.
        A callback can be called with all "values" (as *args) if defined as the
        `values_callback` named argument. Real values will then be the result of
        this callback.
        """
        values_callback = kwargs.pop('values_callback', None)

        pieces = []
        if args:
            if len(args) % 2 != 0:
                raise RedisError("ZADD requires an equal number of "
                                 "values and scores")
            pieces.extend(args)

        for pair in iteritems(kwargs):
            pieces.append(pair[1])
            pieces.append(pair[0])

        values = pieces[1::2]
        if values_callback:
            values = values_callback(*values)

        scores = pieces[0::2]

        pieces = []
        for z in zip(scores, values):
            pieces.extend(z)

        return pieces
Esempio n. 26
0
    def test_data(self):
        for dm, exp in zip(self.dms, self.dm_redundant_forms):
            obs = dm.data
            self.assertTrue(np.array_equal(obs, exp))

        with self.assertRaises(AttributeError):
            self.dm_3x3.data = 'foo'
Esempio n. 27
0
            def wrapped_reader(fp, mode='U', mutate_fh=False, **kwargs):
                file_keys = []
                files = [fp]
                for file_arg in file_args:
                    if file_arg in kwargs:
                        if kwargs[file_arg] is not None:
                            file_keys.append(file_arg)
                            files.append(kwargs[file_arg])
                    else:
                        kwargs[file_arg] = None

                with open_files(files, mode) as fhs:
                    try:
                        for key, fh in zip(file_keys, fhs[1:]):
                            kwargs[key] = fh

                        generator = reader(fhs[0], **kwargs)
                        if not isinstance(generator, types.GeneratorType):
                            # Raise an exception to be handled next line,
                            # because although reader executed without error,
                            # it is not a generator.
                            raise Exception()
                    # If an exception is thrown at this point, it cannot
                    # be a generator. If there was a `yield` statment, then
                    # Python would have returned a generator regardless of the
                    # content. This does not preclude the generator from
                    # throwing exceptions.
                    except Exception:
                        raise InvalidRegistrationError("'%s' is not a "
                                                       "generator." %
                                                       reader.__name__)

                    while True:
                        yield next(generator)
Esempio n. 28
0
    def test_valid_files(self):
        for valid, kwargs, components in self.valid_files:
            for kwarg in kwargs:
                _drop_kwargs(kwarg, 'seq_num')
                constructor = kwarg.get('constructor', Sequence)
                expected = [
                    constructor(
                        c['sequence'],
                        metadata={'id': c['id'],
                                  'machine_name': c['machine_name'],
                                  'run_number': c['run_number'],
                                  'lane_number': c['lane_number'],
                                  'tile_number': c['tile_number'],
                                  'x': c['x'],
                                  'y': c['y'],
                                  'index': c['index'],
                                  'read_number': c['read_number']},
                        positional_metadata={
                            'quality': np.array(c['quality'], dtype=np.uint8)})
                    for c in components]

                observed = list(_qseq_to_generator(valid, **kwarg))
                self.assertEqual(len(expected), len(observed))
                for o, e in zip(observed, expected):
                    self.assertEqual(o, e)
Esempio n. 29
0
    def test_fasta_to_sequence_collection_and_alignment(self):
        test_cases = (self.empty, self.single,
                      self.sequence_collection_different_type,
                      self.lowercase_seqs)

        for constructor, reader_fn in ((SequenceCollection,
                                        _fasta_to_sequence_collection),
                                       (Alignment,
                                        _fasta_to_alignment)):
            # see comment in test_fasta_to_generator_valid_files (above) for
            # testing strategy
            for exp_list, kwargs, fasta_fps, qual_fps in test_cases:
                exp = constructor(exp_list)

                for fasta_fp in fasta_fps:
                    obs = reader_fn(fasta_fp, **kwargs)

                    self.assertEqual(len(obs), len(exp))
                    for o, e in zip(obs, exp):
                        e = e.copy()
                        del e.positional_metadata['quality']
                        self.assertEqual(o, e)

                    for qual_fp in qual_fps:
                        obs = reader_fn(fasta_fp, qual=qual_fp, **kwargs)
                        self.assertEqual(obs, exp)
Esempio n. 30
0
def cat(input_handles, output_handle, names=None, prefixes=None):
    """
    Save k-mer profiles from several files to one k-mer profile file.

    :arg input_handles: Open readable k-mer profile file handles.
    :type input_handles: list(file-like object)
    :arg output_handle: Open writeable k-mer profile file handle.
    :type output_handle: h5py.File
    :arg names: Optional list of names of the k-mer profiles to consider. If
      not provided, all profiles in all input files are considered.
    :type names: list(str)
    :arg prefixes: Optional list of prefixes to use for the saved k-mer
      profile names, one per input file. If not provided, profile names are
      assumed to be disjoint and no prefix is used.
    :type prefixes: list(str)
    """
    prefixes = prefixes or ['' for _ in input_handles]

    if len(prefixes) != len(input_handles):
        raise ValueError(PREFIX_COUNT_ERROR)

    for input_handle, prefix in zip(input_handles, prefixes):
        names_ = names or sorted(input_handle['profiles'])

        for name in names_:
            try:
                profile = klib.Profile.from_file(input_handle, name=name)
            except KeyError:
                # In this specific case, we ignore non-existing profiles,
                # since the user may have specified them for selecting from
                # one of the other input files.
                continue
            profile.save(output_handle, name=prefix + name)
Esempio n. 31
0
    def to_file(self, out_f, delimiter='\t'):
        """Save the dissimilarity matrix to file in delimited text format.

        Parameters
        ----------
        out_f : file-like object or filename
            File-like object to write serialized data to, or name of
            file. If it's a file-like object, it must have a ``write``
            method, and it won't be closed. Else, it is opened and
            closed after writing.
        delimiter : str, optional
            Delimiter used to separate elements in output format.

        See Also
        --------
        from_file

        """
        with open_file(out_f, 'w') as out_f:
            formatted_ids = self._format_ids(delimiter)
            out_f.write(formatted_ids)
            out_f.write('\n')

            for id_, vals in zip(self.ids, self.data):
                out_f.write(id_)
                out_f.write(delimiter)
                out_f.write(delimiter.join(np.asarray(vals, dtype=np.str)))
                out_f.write('\n')
Esempio n. 32
0
 def rename(self, target):
     """Return renamed contig line."""
     for rx, so in zip(self.regex, self.soterm):
         match = rx.search(target)
         if match is None:
             continue
         match = match.groupdict()
         number = match.get('number', None)
         roman_num = match.get('roman', None)
         letter = match.get('letter', None)
         if number is not None:
             contig = self._format_number(number)
         elif roman_num is not None:
             if roman_num.isdigit():
                 contig = roman.roman_from_int(int(roman_num))
             else:
                 contig = roman_num
         elif letter is not None:
             contig = letter.upper()
         else:
             raise Exception("Regex {} doesn't contain a number, letter, "
                             "or roman numeral field.".format(rx.pattern))
         return "{}_{}{}".format(self.abbrev, so, contig)
     raise NoMatchException("Doesn't match any regular expression: "
                            "{}".format(target))
Esempio n. 33
0
    def _setup_resources(self, requests, resources, force_initialize,
                         enable_debug, skip_init):
        """Prepare the resources for work.

        Iterates over the resources and tries to prepare them for
        work by validating, resetting and initializing them.

        The locked and initialized resources are yielded instead of returned
        as a list so in case one got an exception in initialization, the
        user would know which resources were already initialized.

        Args:
            requests (tuple): list of the ResourceRequest.
            resources (list): list of the resources instances.
            force_initialize (bool): determines if the resources will be
                initialized even if their validation succeeds.
            enable_debug (bool): True to wrap the resource's method with debug.
            skip_init (bool): True to skip initialization and validation.

        Yields:
            tuple. pairs of locked and initialized resources (name, resource).

        Raises:
            ServerError. resource manager failed to lock resources.
        """
        self.logger.debug("Setting up the locked resources")

        for resource, request in zip(resources, requests):
            if enable_debug:
                resource.enable_debug()

            resource.setup_resource(skip_init=skip_init,
                                    force_initialize=force_initialize)

            yield (request.name, resource)
Esempio n. 34
0
 def test_transpose(self):
     for dm, transpose in zip(self.dms, self.dm_transposes):
         self.assertEqual(dm.T, transpose)
         self.assertEqual(dm.transpose(), transpose)
         # We should get a reference to a different object back, even if the
         # transpose is the same as the original.
         self.assertTrue(dm.transpose() is not dm)
Esempio n. 35
0
File: kmer.py Progetto: yimsea/kPAL
def smooth(input_handle_left,
           input_handle_right,
           output_handle_left,
           output_handle_right,
           names_left=None,
           names_right=None,
           summary='min',
           custom_summary=None,
           threshold=0):
    """
    Smooth two profiles by collapsing sub-profiles. If the files contain more
    than one profile, they are linked by name and processed pairwise.

    :arg h5py.File input_handle_left, input_handle_right: Open readable k-mer
      profile handle.
    :arg h5py.File output_handle_left, output_handle_right: Open writeable
      k-mer profile file handle.
    :arg list(str) names_left, names_right: Optional list of names of the
      k-mer profiles to consider. If not provided, all profiles in the file
      are considered.
    :arg str summary: Name of the summary function.
    :arg str custom_summary: Custom summary function.
    :arg int threshold: Threshold for the summary function.
    """
    names_left = names_left or sorted(input_handle_left['profiles'])
    names_right = names_right or sorted(input_handle_right['profiles'])

    if len(names_left) != len(names_right):
        raise ValueError(PAIRED_NAMES_COUNT_ERROR)

    if custom_summary:
        if re.match(_PYTHON_IMPORTABLE, custom_summary):
            # Importable definition, e.g. `package.module.summary_function`.
            module, name = custom_summary.rsplit('.', 1)
            summary_function = getattr(importlib.import_module(module), name)
        else:
            # Expression over `values`, e.g. `np.max(values)`. The `numpy`
            # package is available as `np`.
            summary_function = eval('lambda values: ' + custom_summary,
                                    {'np': np})
    else:
        summary_function = metrics.summary[summary]

    dist = kdistlib.ProfileDistance(summary=summary_function,
                                    threshold=threshold)

    for name_left, name_right in zip(names_left, names_right):
        profile_left = klib.Profile.from_file(input_handle_left,
                                              name=name_left)
        profile_right = klib.Profile.from_file(input_handle_right,
                                               name=name_right)

        if profile_left.length != profile_right.length:
            raise ValueError(LENGTH_ERROR)

        dist.dynamic_smooth(profile_left, profile_right)

        profile_left.save(output_handle_left)
        profile_right.save(output_handle_right)
Esempio n. 36
0
    def test_results_of_tutorial(self):
        """Test against publication of Sens-Schoenfelder and Wegler (2006)"""
        plot = PLOT
        freq = [0.1875, 0.375, 0.75, 1.5, 3.0, 6.0, 12.0, 24.0]  # page 1365
        g0 = [2e-6, 2e-6, 1e-6, 1e-6, 1e-6, 1e-6, 1.5e-6, 2e-6]  # fig 4
        Qi = [2e-3, 2e-3, 1.8e-3, 2e-3, 1.5e-3, 1e-3, 5e-4, 2e-4]  # fig 5
        freq = np.array(freq)
        g0 = np.array(g0)
        b = np.array(Qi) * (2 * np.pi * np.array(freq))
        M0 = {'20010623': 5.4e14, '20020722': 4.1e15, '20030222': 1.5e16,
              '20030322': 8.8e14, '20041205': 6.8e15}  # table 1
        kwargs = {
            'plot_energies': plot, 'plot_optimization': plot,
            'plot_fits': plot, 'plot_eventresult': plot,
            'plot_eventsites': plot, 'plot_results': plot,
            'plot_sites': plot, 'plot_sds': plot, 'plot_mags': plot
        }
        ind = np.logical_and(freq > 0.3, freq < 10)
        freq = freq[ind]
        g0 = np.array(g0)[ind]
        b = np.array(b)[ind]
        with tempdir(delete=not plot):
            run(create_config='conf.json', tutorial=True)
            result = run(conf='conf.json', **kwargs)
            if plot:
                plot_comparison(result['freq'], freq, result['g0'], g0,
                                result['b'], b)
        M0_qopen = {evid.split('_')[0]: r.get('M0')
                    for evid, r in result['events'].items()}
        temp = [(M0_qopen[evid], M0[evid]) for evid in sorted(M0)]
        M0_qopen, M0 = zip(*temp)
        M0 = np.array(M0) / 2 ** 0.5  # wrong surface correction in paper
        # There seems to be a wrong factor of around 1e4 in the observed
        # envelopes (fig. 3). the error could be in the paper or the script.
        # Therefore M0 should be different by a factor of 1e2,
        # but actually they differ by a factor of 10. This corresponds to
        # a magnitude differenc of 0.67. The magnitude comparison mags.pdf
        # suggests that the determined M0s by the script are ok.
        M0 = 10 * M0

#        np.set_printoptions(formatter={'all':lambda x: '%.2g' % x})
#        print('g0 test vs paper')
#        print(np.array(result['g0']))
#        print(g0)
#        print('b test vs paper')
#        print(np.array(result['b']))
#        print(b)
#        print('M0 test vs paper')
#        print(np.array(M0_qopen))
#        print(M0)
#        plot_comparison(result['freq'], freq, result['g0'], g0, result['b'],b)

        np.testing.assert_equal(result['freq'], freq)
#        print(np.log10(result['g0'] / g0))
#        print(np.log10(result['b'] / b))
#        print(np.log10(M0_qopen / M0))
        np.testing.assert_array_less(np.abs(np.log10(result['g0'] / g0)), 0.5)
        np.testing.assert_array_less(np.abs(np.log10(result['b'] / b)), 0.5)
        np.testing.assert_array_less(np.abs(np.log10(M0_qopen / M0)), 0.51)
Esempio n. 37
0
    def ParseFiles(self, knowledge_base, pathspecs, filedescs):
        del knowledge_base  # Unused.

        fileset = {
            pathspec.path: obj
            for pathspec, obj in zip(pathspecs, filedescs)
        }
        return self.ParseFileset(fileset)
Esempio n. 38
0
 def _Combine(self, pathspecs, file_objects):
     result = {}
     for pathspec, file_object in zip(pathspecs, file_objects):
         path = pathspec.path
         file_object.seek(0)
         contents = utils.ReadFileBytesAsUnicode(file_object)
         result[path] = contents
     return result
Esempio n. 39
0
 def test_transpose(self):
     """Test retrieving transpose of dissimilarity matrix."""
     for dm, transpose in zip(self.dms, self.dm_transposes):
         self.assertEqual(dm.T, transpose)
         self.assertEqual(dm.transpose(), transpose)
         # We should get a reference to a different object back, even if the
         # transpose is the same as the original.
         self.assertTrue(dm.transpose() is not dm)
Esempio n. 40
0
    def test_data(self):
        """Test retrieving/setting data matrix."""
        for dm, exp in zip(self.dms, self.dm_redundant_forms):
            obs = dm.data
            self.assertTrue(np.array_equal(obs, exp))

        with self.assertRaises(AttributeError):
            self.dm_3x3.data = 'foo'
Esempio n. 41
0
    def test_cosine_similarity(self):
        a = np.random.random_integers(1, 100, 100)
        b = np.random.random_integers(1, 100, 100)

        cs = sum([x[0] * x[1] for x in zip(a, b)]) / (math.sqrt(sum(x * x for x in a)) *
                                                      math.sqrt(sum(x * x for x in b)))
        np.testing.assert_almost_equal(metrics.cosine_similarity(a, b),
                                       cs)
Esempio n. 42
0
    def test_multiset_many_zeros(self):
        a = np.random.random_integers(0, 2, 100)
        b = np.random.random_integers(0, 2, 100)
        pairwise = metrics.pairwise['prod']

        values = [pairwise(i, j) for i, j in zip(a, b) if i or j]
        np.testing.assert_almost_equal(metrics.multiset(a, b, pairwise),
                                       sum(values) / (len(values) + 1))
Esempio n. 43
0
 def wrapped_reader(file, encoding=self._encoding,
                    newline=self._newline, **kwargs):
     file_keys, files, io_kwargs = self._setup_locals(
         file_params, file, encoding, newline, kwargs)
     with open_files(files, mode='r', **io_kwargs) as fhs:
         kwargs.update(zip(file_keys, fhs[:-1]))
         for item in reader_function(fhs[-1], **kwargs):
             yield item
Esempio n. 44
0
def zip_t(zip_sequence):
    """
    Transformation for Sequence.zip
    :param zip_sequence: sequence to zip with
    :return: transformation
    """
    return Transformation('zip(<sequence>)',
                          lambda sequence: zip(sequence, zip_sequence), None)
Esempio n. 45
0
    def calpoints(self):
        """The calibration points, in (ch, kev) pairs.

        Returns:
          a list of 2-element tuples of (channel, energy[keV])
        """

        return list(zip(self.channels, self.energies))
Esempio n. 46
0
    def test_fasta_to_pairlist(self):
        """Correctly returns a list of (seq, label)"""
        exp = [("AC", "sid_0"), ("ACAGTC", "sid_1"), ("ACTA", "sid_2"),
               ("CAGT", "sid_3"), ("CATGAA", "sid_4"), ("A", "sid_5"),
               ("CATGTA", "sid_6"), ("CAA", "sid_7"), ("CACCA", "sid_8")]

        for obs, exp in zip(fasta_to_pairlist(self.seqs), exp):
            self.assertEqual(obs, exp)
Esempio n. 47
0
    def test_empty_str_replacement(self):
        exp = [('', 'ACGT', range(4)), ('foobar', 'GAU', None),
               (' foo bar', 'TAG', None), ('foo bar baz', 'A', [42])]
        obs = list(_format_fasta_like_records(self.gen, '', '', False))

        self.assertEqual(len(obs), len(exp))
        for o, e in zip(obs, exp):
            npt.assert_equal(o, e)
Esempio n. 48
0
def zip_with_index_t(start):
    """
    Transformation for Sequence.zip_with_index
    :return: transformation
    """
    return Transformation('zip_with_index',
                          lambda sequence: zip(sequence, count(start=start)),
                          None)
Esempio n. 49
0
    def __init__(self, *args, **kwargs):
        """
        Handles adding a variation to the cart or wishlist.

        When adding from the product page, the product is provided
        from the view and a set of choice fields for all the
        product options for this product's variations are added to
        the form. When the form is validated, the selected options
        are used to determine the chosen variation.

        A ``to_cart`` boolean keyword arg is also given specifying
        whether the product is being added to a cart or wishlist.
        If a product is being added to the cart, then its stock
        level is also validated.

        When adding to the cart from the wishlist page, a sku is
        given for the variation, so the creation of choice fields
        is skipped.
        """
        self._product = kwargs.pop("product", None)
        self._to_cart = kwargs.pop("to_cart")
        super(AddProductForm, self).__init__(*args, **kwargs)
        # Adding from the wishlist with a sku, bail out.
        if args[0] is not None and args[0].get("sku", None):
            return
        # Adding from the product page, remove the sku field
        # and build the choice fields for the variations.
        del self.fields["sku"]
        option_fields = ProductVariation.option_fields()
        if not option_fields:
            return
        option_names, option_labels = list(zip(*[(f.name, f.verbose_name)
            for f in option_fields]))
        option_values = list(zip(*self._product.variations.filter(
            unit_price__isnull=False).values_list(*option_names)))
        if option_values:
            for i, name in enumerate(option_names):
                values = [_f for _f in set(option_values[i]) if _f]
                if values:
                    field = forms.ChoiceField(label=option_labels[i],
                                              choices=make_choices(values),
                                              widget=forms.RadioSelect())
                    self.fields[name] = field
                    print(i,name)
                    print(self.fields[name].choices)
Esempio n. 50
0
def _qseq_sniffer(fh):
    empty = True
    try:
        for _, line in zip(range(10), fh):
            _record_parser(line)
            empty = False
        return not empty, {}
    except QSeqFormatError:
        return False, {}
Esempio n. 51
0
 def test_count_by_record(self):
     counts_by_record = [utils.counts(record, 8) for record in utils.SEQUENCES]
     names = [str(i) for i, _ in enumerate(counts_by_record)]
     filename = self.empty()
     with open(self.fasta(utils.SEQUENCES, names=names)) as fasta_handle:
         with utils.open_profile(filename, 'w') as profile_handle:
             kmer.count([fasta_handle], profile_handle, 8, by_record=True)
     for name, counts in zip(names, counts_by_record):
         utils.test_profile_file(filename, counts, 8, name=name)
Esempio n. 52
0
    def ParseFiles(self, knowledge_base, pathspecs, filedescs):
        del knowledge_base  # Unused.

        # Each file gives us only partial information for a particular PCI device.
        # Iterate through all the files first to create a dictionary encapsulating
        # complete information for each of the PCI device on the system. We need
        # all information for a PCI device before a proto for it can be created.
        # We will store data in a dictionary of dictionaries that looks like this:
        # data = { '0000:7f:0d.0': { 'class': '0x088000',
        #                            'vendor': '0x8086',
        #                            'device': '0x0ee1' } }
        # The key is location of PCI device on system in extended B/D/F notation
        # and value is a dictionary containing filename:data pairs for each file
        # returned by artifact collection for that PCI device.

        # Extended B/D/F is of form "domain:bus:device.function". Compile a regex
        # so we can use it to skip parsing files that don't match it.
        hc = r"[0-9A-Fa-f]"
        bdf_regex = re.compile(r"^%s+:%s+:%s+\.%s+" % (hc, hc, hc, hc))

        # This will make sure that when a non-existing 'key' (PCI location)
        # is accessed for the first time a new 'key':{} pair is auto-created
        data = collections.defaultdict(dict)

        for pathspec, file_obj in zip(pathspecs, filedescs):
            filename = pathspec.Basename()
            # Location of PCI device is the name of parent directory of returned file.
            bdf = pathspec.Dirname().Basename()

            # Make sure we only parse files that are under a valid B/D/F folder
            if bdf_regex.match(bdf):
                # Remove newlines from all files except config. Config contains raw data
                # so we don't want to touch it even if it has a newline character.
                file_data = file_obj.read()
                if filename != "config":
                    file_data = file_data.rstrip(b"\n")
                data[bdf][filename] = file_data

        # Now that we've captured all information for each PCI device. Let's convert
        # the dictionary into a list of PCIDevice protos.
        for bdf, bdf_filedata in iteritems(data):
            pci_device = rdf_client.PCIDevice()
            bdf_split = bdf.split(":")
            df_split = bdf_split[2].split(".")

            # We'll convert the hex into decimal to store in the protobuf.
            pci_device.domain = int(bdf_split[0], 16)
            pci_device.bus = int(bdf_split[1], 16)
            pci_device.device = int(df_split[0], 16)
            pci_device.function = int(df_split[1], 16)

            pci_device.class_id = bdf_filedata.get("class")
            pci_device.vendor = bdf_filedata.get("vendor")
            pci_device.vendor_device_id = bdf_filedata.get("device")
            pci_device.config = bdf_filedata.get("config")

            yield pci_device
Esempio n. 53
0
 def wrapped_reader(file, encoding=self._encoding,
                    newline=self._newline, **kwargs):
     file_keys, files, io_kwargs = self._setup_locals(
         file_params, file, encoding, newline, kwargs)
     with open_files(files, mode='r', **io_kwargs) as fhs:
         # The primary file is at the end of fh because append
         # is cheaper than insert
         kwargs.update(zip(file_keys, fhs[:-1]))
         return reader_function(fhs[-1], **kwargs)
Esempio n. 54
0
 def model(self):
     """
     The polygon model drawn as :class:`fatiando.mesher.Polygon` objects.
     """
     m = [
         Polygon(p.xy, {'density': d})
         for p, d in zip(self.polygons, self.densities)
     ]
     return m
Esempio n. 55
0
File: kmer.py Progetto: yimsea/kPAL
def merge(input_handle_left,
          input_handle_right,
          output_handle,
          names_left=None,
          names_right=None,
          merger='sum',
          custom_merger=None):
    """
    Merge k-mer profiles. If the files contain more than one profile, they are
    linked by name and merged pairwise. The resulting profile name is set to
    that of the original profiles if they match, or to their concatenation
    otherwise.

    :arg h5py.File input_handle_left, input_handle_right: Open readable k-mer
      profile file handle.
    :arg h5py.File output_handle: Open writeable k-mer profile file handle.
    :arg list(str) names_left, names_right: Optional list of names of the
      k-mer profiles to consider. If not provided, all profiles in the file
      are considered.
    :arg function merger: Merge function.
    :arg str custom_merger: Custom merge function.
    """
    names_left = names_left or sorted(input_handle_left['profiles'])
    names_right = names_right or sorted(input_handle_right['profiles'])

    if len(names_left) != len(names_right):
        raise ValueError(PAIRED_NAMES_COUNT_ERROR)

    if custom_merger:
        if re.match(_PYTHON_IMPORTABLE, custom_merger):
            # Importable definition, e.g. `package.module.merge_function`.
            module, name = custom_merger.rsplit('.', 1)
            merge_function = getattr(importlib.import_module(module), name)
        else:
            # Expression over `left` and `right`, e.g. `np.add(left, right)`.
            # The `numpy` package is available as `np`.
            merge_function = eval('lambda left, right: ' + custom_merger,
                                  {'np': np})
    else:
        merge_function = metrics.mergers[merger]

    for name_left, name_right in zip(names_left, names_right):
        profile_left = klib.Profile.from_file(input_handle_left,
                                              name=name_left)
        profile_right = klib.Profile.from_file(input_handle_right,
                                               name=name_right)

        if profile_left.length != profile_right.length:
            raise ValueError(LENGTH_ERROR)

        profile_right.merge(profile_left, merge_function)

        if name_left == name_right:
            name = name_left
        else:
            name = name_left + '_' + name_right
        profile_right.save(output_handle, name=name)
Esempio n. 56
0
 def test_nacl(self):
     master_secret = 'No, I am your father'.encode()
     keys_1 = hkdf(master_secret, 5, salt=b'and pepper')
     keys_2 = hkdf(master_secret, 5, salt=b'and vinegar')
     for k1, k2 in zip(keys_1, keys_2):
         self.assertNotEqual(k1,
                             k2,
                             msg='using different salts should result in '
                             'different keys')
Esempio n. 57
0
 def _add_bonds(self):
     bonds = []
     bonds.extend([
         _ for s in self.segments for _ in zip(
             s.atoms.select_atoms("calpha").ix,
             s.atoms.select_atoms("calpha").ix[1:])
     ])
     self._topology.add_TopologyAttr(topologyattrs.Bonds(bonds))
     self._generate_from_topology()
Esempio n. 58
0
 def _add_to_cart(self, variation, quantity):
     """
     Given a variation, creates the dict for posting to the cart
     form to add the variation, and posts it.
     """
     field_names = [f.name for f in ProductVariation.option_fields()]
     data = dict(list(zip(field_names, variation.options())))
     data["quantity"] = quantity
     self.client.post(variation.product.get_absolute_url(), data)
Esempio n. 59
0
    def test_multi_char_replacement(self):
        exp = [('', 'ACGT', range(4)),
               ('-.--.-foo-.--.--.--.-bar-.-', 'GAU', None),
               (' foo_-__-_ bar_-_', 'TAG', None), ('foo bar baz', 'A', [42])]
        obs = list(_format_fasta_like_records(self.gen, '-.-', '_-_', False))

        self.assertEqual(len(obs), len(exp))
        for o, e in zip(obs, exp):
            npt.assert_equal(o, e)
Esempio n. 60
0
def address_pairs(fields):
    """
    Zips address fields into pairs, appending the last field if the
    total is an odd number.
    """
    pairs = list(zip(fields[::2], fields[1::2]))
    if len(fields) % 2:
        pairs.append(fields[-1])
    return pairs