コード例 #1
0
    def __init__(self, window: Window):
        super().__init__(window)

        from constants import BUTTON_SIZE

        window_size = window.get_size()
        window_center = (window_size[0] / 2, window_size[1] / 2)

        dpi_factor = window.hidpi_factor

        button_size = (BUTTON_SIZE[0] * dpi_factor,
                       BUTTON_SIZE[1] * dpi_factor)
        button_font = Font('sans-serif', 16, dpi_factor)

        self.bg_image = util.load_image('assets/background.png')

        back_btn = Button(window,
                          '[ Back ]',
                          Vector(window_center[0] - button_size[0] / 2,
                                 window_size[1] * 2 / 3),
                          Vector(*button_size),
                          Color(0, 102, 255),
                          Color(255, 255, 255),
                          Color(0, 80, 230),
                          Color(255, 255, 255),
                          font=button_font)
        back_btn.set_click_handler(self.back)
        self.children.append(back_btn)
コード例 #2
0
ファイル: button.py プロジェクト: spooky-squad/super-potato
    def __init__(self,
                 window: Window,
                 text: str,
                 pos: Vector,
                 size: Vector = Vector(150, 50),
                 bg: Color = Color(200, 200, 200),
                 fg: Color = Color(20, 20, 20),
                 bg_over: Color = Color(220, 220, 220),
                 fg_over: Color = Color(20, 20, 20),
                 border_size: int = 0,
                 font: Font = Font('sans-serif', 15, HIDPI_FACTOR)):
        super().__init__(window)
        self._click_handler = None

        self.text = text
        self.pos = pos
        self.size = size

        # Initial colours
        self.bg = bg
        self.fg = fg
        self.bg_over = bg_over
        self.fg_over = fg_over

        # Extra
        self.font = font
        self.border_size = border_size

        # Center
        self.center = (self.pos[0] + self.size[0] / 2,
                       self.pos[1] + self.size[1] / 2)

        # Get corners
        self.bounds = BoundingBox(pos, pos + size)
コード例 #3
0
    def __init__(self, window: Window):
        super().__init__(window)

        from constants import BUTTON_SIZE

        self.window_size = window.get_size()
        self.window_center = (self.window_size[0] / 2, self.window_size[1] / 2)

        dpi_factor = window.hidpi_factor

        button_size = (BUTTON_SIZE[0] * dpi_factor,
                       BUTTON_SIZE[1] * dpi_factor)
        button_font = Font('sans-serif', 16, dpi_factor)

        self.bg_image = util.load_image('assets/background.png')
        self.bg_size = (self.bg_image.get_width(), self.bg_image.get_height())
        self.bg_center = (self.bg_size[0] / 2, self.bg_size[1] / 2)

        self.logo = util.load_image('assets/logo.png')
        self.logo_size = (self.logo.get_width(), self.logo.get_height())
        self.logo_center = (self.logo_size[0] / 2, self.logo_size[1] / 2)

        self.max_score = 0
        self.last_active_level = None

        self.text_font = Font('monospace', 20, window.hidpi_factor)
        self.text_font_color = Color(255, 255, 255)

        # Template to create new button
        start_btn = Button(window,
                           '[ Start ]',
                           Vector(self.window_center[0] - button_size[0] / 2,
                                  self.window_center[1] - button_size[1]),
                           Vector(*button_size),
                           Color(0, 102, 255),
                           Color(255, 255, 255),
                           Color(0, 80, 230),
                           Color(255, 255, 255),
                           font=button_font)
        start_btn.set_click_handler(self.start)
        self.children.append(start_btn)

        help_btn = Button(window,
                          '[ Help ]',
                          Vector(self.window_center[0] - button_size[0] / 2,
                                 self.window_center[1] + button_size[1]),
                          Vector(*button_size),
                          Color(0, 102, 255),
                          Color(255, 255, 255),
                          Color(0, 80, 230),
                          Color(255, 255, 255),
                          font=button_font)
        help_btn.set_click_handler(self.help)
        self.children.append(help_btn)
コード例 #4
0
ファイル: clusters.py プロジェクト: valgaze/sumac
    def __init__(self, gb, seq_keys, gb_dir, num_cores, minlength, maxlength, length_thres=0.5, threshold=0.75, evalue=(1.0/10**10)):
        """
        Input: gb dictionary of SeqRecords, keys to all sequences, and an optional threshold for clustering.
        Output: a list of cluster files from UCLUST
        """
        ClusterBuilder.__init__(self, seq_keys)
        self.seq_keys = seq_keys
        self.threshold = threshold
        color = Color()

        if not os.path.exists("uclusters"):
            os.makedirs("uclusters")
        
        # write sequences to fasta
        sequences = []
        for seq_key in seq_keys:
            record = gb[seq_key]
            record.description = record.annotations["organism"] + " " + record.description
            if "sp." not in record.annotations["organism"]:
                sequences.append(record)
        file_name = "_sumac"
        f = open(file_name, "wb")
        SeqIO.write(sequences, f, 'fasta')
        f.close()
        with open("_sumac", "r") as f, open("_sumac_filtered", "w") as fout:
            for l in f:
                if ">" in l:
                    l = l.replace(" ", "_")    
                fout.write(l)

        # call UCLUST
        sort_sequences = ["usearch", "-sortbylength", "_sumac_filtered", "-fastaout", "_sumac_sorted",
                          "-minseqlength", str(minlength), "-maxseqlength", str(maxlength)]
        uclust = ["usearch", "-cluster_fast", "_sumac_sorted", "-id", str(threshold),
                  "-minsl", str(length_thres), "-strand", "both", "-threads", str(num_cores), 
                  "-clusters", "uclusters/", "-fulldp", "-evalue", str(evalue)]
        try:
            subprocess.check_call(sort_sequences)
            subprocess.check_call(uclust)
        except CalledProcessError as e:
            print(color.red + "UCLUST error: " + str(e) + color.done)
            print(color.red + "Trying SLINK instead..." + color.done)
            self.error = True
            return
        except OSError as e:
            print(color.red + "UCLUST is not installed correctly." + color.done)
            print(color.red + "OS error: " + str(e) + color.done)
            print(color.red + "Trying SLINK instead..." + color.done)
            self.error = True
            return
        finally:
            if os.path.exists("_sumac"):
                subprocess.check_call(["rm", "_sumac"])
            if os.path.exists("_sumac_filtered"):
                subprocess.check_call(["rm", "_sumac_filtered"])
            if os.path.exists("_sumac_sorted"):
                subprocess.check_call(["rm", "_sumac_sorted"])
        cluster_files = [ f for f in listdir("uclusters/") if isfile(join("uclusters/", f)) ]
        for f in cluster_files:
            self.clusters.append(f)
コード例 #5
0
    def calculate_PD_parallel(self, num_cores):
        """
        Method to calculate the fraction of triples, a measure of partial decisiveness (PD).
        See: Sanderson, M.J., McMahon, M.M. & Steel, M., 2010. BMC evolutionary biology, 10. 
        """
        color = Color()
        lock = multiprocessing.Lock()
        manager = multiprocessing.Manager()
        #already_compared = manager.list()
        #dist_matrix = manager.list()
        otus_shared = manager.dict()
        otus_shared = otus
        decisive_triples = manager.Value('i', 0)
        total_triples = manager.Value('i', 0)
        total = self.binomial_coefficient(len(self.otus), 3)

        for i in range(num_cores):
            p = multiprocessing.Process(target=calculate_PD_worker,
                                        args=(lock, i, num_cores,
                                              decisive_triples, total_triples,
                                              total, otus_shared))
            p.start()
            processes.append(p)

        for p in processes:
            p.join()

        self.otus = otus_shared

        sys.stdout.write("\r" + color.blue + "Calculating PD: " + color.red +
                         "100.00% " + color.blue + "finished\n" + color.done)
        sys.stdout.flush()
        return round(decisive_triples / float(total_triples), 2)
コード例 #6
0
ファイル: distancematrix.py プロジェクト: valgaze/sumac
    def __init__(self, gb, seq_keys, length_threshold, gb_dir, num_cores):
        """
        Takes as input a dictionary of SeqRecords gb and the keys to all sequences.
        length_threshold is the threshold of sequence length percent similarity to cluster taxa.
        For example if length_threshold = 0.25, and one sequence has
        length 100, the other sequence must have length 75 to 125. If the lengths are not similar
        enough the distance is set to 50 (which keeps them from being clustered).
        Generates a 2 dimensional list of distances. Distances are blastn e-values.
        """
        lock = multiprocessing.Lock()
        manager = multiprocessing.Manager()
        already_compared = manager.list()
        dist_matrix = manager.list()
        row = []
        for i in range(len(seq_keys)):
            row.append(99)
        for i in range(len(seq_keys)):
            dist_matrix.append(row)

        color = Color()
        print(color.blue + "Spawning " + color.red + str(num_cores) + color.blue + " processes to make distance matrix." + color.done)
        processes = []

        for i in range(num_cores):
            p = multiprocessing.Process(target=self.distance_matrix_worker, args=(seq_keys, length_threshold, dist_matrix, already_compared, lock, i, gb_dir))
            p.start()
            processes.append(p)

        for p in processes:
            p.join()

        sys.stdout.write("\n")
        sys.stdout.flush()
        self.distance_matrix = dist_matrix
コード例 #7
0
 def print_data(self):
     """
     Prints out details on the final aligned supermatrix.
     """
     # TODO: make the output of this more useful
     color = Color()
     print(color.blue + "Supermatrix attributes:")
     records = SeqIO.parse(self.file, "fasta")
     num_records = 0
     total_missing = 0
     for record in records:
         otu = record.description
         missing = 0
         for letter in record.seq:
             if letter == '?':
                 missing += 1
                 total_missing += 1
         print(color.yellow + "OTU: " + color.red + otu + color.yellow +
               " % missing data = " + color.red +
               str(round(missing / float(len(record.seq)), 2)))
         num_records += 1
         matrix_length = len(record.seq)
     print(color.blue + "Total number of OTUs = " + color.red +
           str(num_records))
     print(color.blue + "Total length of matrix = " + color.red +
           str(matrix_length))
     print(color.blue + "Taxon coverage density = " + color.red +
           str(self.get_coverage_density()))
     print(color.blue + "Total % missing data = " + color.red +
           str(round(total_missing /
                     float(matrix_length * num_records), 2)) + color.done)
コード例 #8
0
    def __init__(self, world: 'World', pos: Tuple[int, int], size: Tuple[int,
                                                                         int]):
        super().__init__(world)

        self.pos = pos
        self.size = size
        self.color = Color(80, 80, 80)
コード例 #9
0
 def print_PD(self):
     """
     Prints partial decisiveness.
     """
     color = Color()
     print(color.blue + "Partial decisiveness (fraction of triples) = " +
           color.red + str(self.get_PD()) + color.done)
コード例 #10
0
    def print_data(self):
        """
        Prints the name of each DNA region, the number of taxa, the aligned length,
        missing data (%), and taxon coverage density
        """
        # first get list of all taxa
        taxa = self.get_all_taxa()

        # print data for each region
        i = 1
        color = Color()
        for alignment in self.files:
            records = list(SeqIO.parse(alignment, "fasta"))
            if self.user_provided:
                region_name = alignment
            else:
                descriptors = records[0].description.split(" ")
                region_name = " ".join(descriptors[5:])
            print(color.blue + "Aligned cluster #: " + color.red + str(i) +
                  color.done)
            print(color.yellow + "DNA region: " + color.red + region_name +
                  color.done)
            print(color.yellow + "OTUs: " + color.red + str(len(records)) +
                  color.done)
            print(color.yellow + "Aligned length: " + color.red +
                  str(len(records[0].seq)) + color.done)
            print(color.yellow + "Missing data (%): " + color.red +
                  str(round(100 -
                            (100 * len(records) / float(len(taxa))), 1)) +
                  color.done)
            print(color.yellow + "Taxon coverage density: " + color.red +
                  str(round(len(records) / float(len(taxa)), 2)) + color.done)
            i += 1
コード例 #11
0
 def align_cluster(self, cluster_file):
     """
     Worker fuction for align_clusters
     Inputs a FASTA file containing an unaligned sequence cluster.
     Uses MAFFT to align the cluster.
     """
     mafft_cline = MafftCommandline(input=cluster_file)
     mafft_cline.set_parameter("--auto", True)
     mafft_cline.set_parameter("--adjustdirection", True)
     color = Color()
     print(color.red + str(mafft_cline) + color.done)
     sys.stdout.flush()
     if cluster_file.find("/") != -1:
         alignment_file = "alignments" + cluster_file[cluster_file.index("/"
                                                                         ):]
     else:
         alignment_file = "alignments/" + cluster_file
     try:
         stdout, stderr = mafft_cline()
         with open(alignment_file, "w") as handle:
             handle.write(stdout)
     except:
         print(
             color.red +
             "Error: alignment file not generated. Please check your MAFFT installation."
             + color.done)
     return alignment_file
コード例 #12
0
 def print_data(self):
     color = Color()
     print(color.blue + "Name = " + color.red + self.name)
     print(color.blue + "Sequence = " + color.red + self.sequence)
     print(color.blue + "Accessions = " + color.red)
     print(self.accessions)
     print(color.blue + "Sequence_lengths = " + color.red)
     print(self.sequence_lengths)
コード例 #13
0
    def state(self):
        """ Exec state of this component. """
        rect = self.root.find_element_by_css_selector('rect')
        style = Style(rect.get_attribute('style'))
        stroke = Color.from_string(style.stroke)

        # red stroke
        if (stroke == Color(255, 0, 0)):
            return 'INVALID'

        # green stroke
        if (stroke == Color(0, 255, 0)):
            return 'VALID'

        # blue stroke
        if (stroke == Color(0, 0, 255)):
            return 'RUNNING'

        return 'UNKNOWN'
コード例 #14
0
    def __init__(self, window: Window, source: WindowHandler):
        super().__init__(window)
        self.source = source
        self.levels = self._init_levels()
        self.level = self.levels[0]
        self.player = Player(self)
        self.window = window

        self.text_font = Font('monospace', 16, window.hidpi_factor)
        self.text_font_color = Color(255, 255, 255)
コード例 #15
0
ファイル: genbank.py プロジェクト: aaw866/sumac
 def print_search_status(self, i, total):
     color = Color()
     sys.stdout.write('\r' + color.yellow + 'Ingroup sequences found: ' \
                       + color.red + str(len(self.ingroup_keys)) + color.yellow \
                       + '  Outgroup sequences found: ' + color.red \
                       + str(len(self.outgroup_keys)) + color.yellow \
                       + '  Percent searched: ' + color.red \
                       + str(round( 100 * float(i) / total , 1)) + color.done
                     )
     sys.stdout.flush()
コード例 #16
0
ファイル: squib.py プロジェクト: pennomi/brimstone-rpg
    def draw_text(
        self,
        id: str = None,
        x: float = 0,
        y: float = 0,
        w: float = -1.0,  # By default, DON'T restrict w/h
        h: float = -1.0,
        text: str = "",
        color: Color = BLACK,
        font_name: str = "Ubuntu",
        font_size: int = 16,
        align: str = "left",
        line_spacing: int = 0,
        justify: bool = False,
        debug: bool = False,
    ):
        """Draw the configured text widget on the canvas.
        """
        # First, draw a debug box (if requested)
        if debug:
            self.draw_rect(x=x,
                           y=y,
                           w=w,
                           h=h,
                           color=Color(0.0, 1.0, 1.0, 1.0),
                           stroke=True,
                           fill=False)

        # Process the inputs
        text = text.replace("\\n", "\n")
        alignment = {
            "left": TextAlignment.Left,
            "center": TextAlignment.Center,
            "right": TextAlignment.Right,
        }[align]  # TODO: Validate on the parser

        # Configure the text
        self.renderer.set_font(font_name, font_size)
        self.renderer.configure_text_layout(
            width=w,
            height=h,
            line_spacing=line_spacing,
            alignment=alignment,
            justify=justify,
        )
        self.renderer.set_color(*color)
        self.renderer.set_text(text)

        # Draw the text
        with self.renderer.translate(x, y):
            self.renderer.paint_text()
コード例 #17
0
 def search(self, gb, max_ingroup):
     """
     Perform search of all GB SeqRecords for ingroup/outgroup,
     and save results of search to file.
     """
     keys = gb.keys()
     total = len(keys)
     i = 0
     ingroup_terms = self.ingroup.split('+')
     outgroup_terms = self.outgroup.split('+')
     for key in keys:
         found = False
         try:
             gb_tax = gb[key].annotations['taxonomy']
             if type(gb_tax) == str:
               taxonomy = set([gb_tax])
             else:
               taxonomy = set(gb_tax)
             gb_org = gb[key].annotations['organism']
             if type(gb_org) == str:
               organism = set([gb_org])
             else:
               organism = set(gb_org)
             tax_and_org = taxonomy.union(organism)
             for term in ingroup_terms:
                 if term in tax_and_org:
                     self.ingroup_keys.append(key)
                     found = True
                     break
             if not found:
                 for term in outgroup_terms:
                     if term in tax_and_org:
                         self.outgroup_keys.append(key)
                         break
         except:
             sys.stdout.write('\n')
             sys.stdout.flush()
             color = Color()
             print(color.red + 'Caught Biopython GenBank parsing error! NCBI accession: ' + color.yellow + key + color.done)
         self.print_search_status(i, total)
         i += 1
         if max_ingroup is not None and len(self.ingroup_keys) == max_ingroup:
             sys.stdout.write('\n')
             sys.stdout.flush()  
             self.write_file()
             return 
     sys.stdout.write('\n')
     sys.stdout.flush()
     self.write_file()
コード例 #18
0
def get_boid_color(chromatid_a, chromatid_b):

    colors_1 = get_color_genes(chromatid_a)
    colors_2 = get_color_genes(chromatid_b)

    def average_color(a, b):
        final_color = int((a + b)/2)
        if final_color > 255:
            final_color = 255
        if final_color < 0:
            final_color = 0
        return final_color

    R, G, B = [average_color(colors_1[i], colors_2[i]) for i in range(3)]

    return Color(R, G, B)
コード例 #19
0
ファイル: genbank.py プロジェクト: aaw866/sumac
 def read_file(self):
     """
     Loads results of GB search from file.
     """
     groups = pickle.load( open( "gb_search_results", "rb" ) )
     self.ingroup = groups["ingroup"]
     self.outgroup = groups["outgroup"]
     self.ingroup_keys = groups["ingroup_keys"]
     self.outgroup_keys = groups["outgroup_keys"]
     color = Color()
     print(color.yellow + 'This search was already performed. Loading previous results...' + color.done)
     print(color.yellow + 'Ingroup sequences found: ' \
           + color.red + str(len(self.ingroup_keys)) + color.yellow \
           + '  Outgroup sequences found: ' + color.red \
           + str(len(self.outgroup_keys)) + color.done \
          )
コード例 #20
0
ファイル: parser.py プロジェクト: pennomi/brimstone-rpg
def _parse_expression(key: str, exp: str):
    """Take in an expression and safely transform it into a python expression.
    """
    # Do some cleaning
    exp = exp.strip()

    # TODO: Use regex to validate first?
    # TODO: Maybe PyParsing could validate it
    # TODO: Support expressions
    if key in [
            "x", "y", "w", "h", "radius", "padding_x", "padding_y",
            "line_spacing"
    ]:
        return float(exp)
    if key == "color":
        return Color(*tuple(float(e) for e in exp.split(',')))
    return exp
コード例 #21
0
    def render(self, canvas: simplegui.Canvas):
        bounds = self.get_bounds()
        dpi_factor = self.window.hidpi_factor

        # Draw player.
        if PLAYER_POTATO:
            dest_center = self.pos + self.size / 2
            index = (self.roll // self.sprite_cols) % self.sprite_cols
            self.sprite.draw(canvas, dest_center * dpi_factor,
                             self.size * dpi_factor, (index, 0))
        else:
            point_list = [p.multiply(dpi_factor).into_tuple() for p in bounds]
            color = Color(120, 120, 200)

            canvas.draw_polygon(point_list, 1, str(color), str(color))

        # Update position.
        self.last_pos = self.pos.copy()
        self.pos.add(self.vel)

        self.roll += self.vel.x * 2 / PLAYER_VELOCITY[0]
        self.roll += 1

        if abs(self.vel.x) > PLAYER_VELOCITY[0]:
            self.vel.x = math.copysign(PLAYER_VELOCITY[0], self.vel.x)

        # Check collisions position.
        self.on_ground = False

        bounds = self.get_bounds()
        if not self.is_dying:
            for item in self.world.level.items:
                if item.collides_with(bounds):
                    item.on_collide(self)

        self.vel.add(self.accel)

        # Do gravity and platform collision.
        if self.on_ground:
            self.vel.y = 0
            self.accel.y = 0
        else:
            self.accel.y = -ACCEL_GRAVITY

        if bounds.max.y >= WINDOW_SIZE[1] or bounds.min.x <= 0:
            self.on_death()
コード例 #22
0
ファイル: genbank.py プロジェクト: aaw866/sumac
 def download(cls, divisions, path):
     """
     Downloads and uncompresses files for a GenBank division.
     Path should be the absolute path to save the GB files.
     """
     for division_input in divisions:
         color = Color()
         division = str(division_input).lower()
         print(color.purple + "Connecting to ftp.ncbi.nlm.nih.gov..." + color.done)
         ftp = FTP("ftp.ncbi.nlm.nih.gov")
         ftp.login()
         print(color.yellow + "Opening directory genbank..." + color.done)
         ftp.cwd("genbank")
         file_list = ftp.nlst()
         i = 1
         file_name = "gb" + division + str(i) + ".seq.gz"
         if not os.path.exists(path):
             os.makedirs(path)
         path = path + "/"
         while file_name in file_list:
             print(color.red + "Downloading file " + file_name + color.done)
             file = open(path + file_name, "wb")
             cls.getbinary(ftp, file_name, file)
             file.close()
             print(color.yellow + "Uncompressing file " + file_name + color.done)
             file = gzip.open(path + file_name, "rb")
             file_content = file.read()
             file.close()
             file = open(path + file_name[:-3], "wb")
             file.write(file_content)
             file.close()
             os.remove(path + file_name)
             i += 1
             file_name = 'gb' + division + str(i) + '.seq.gz'
         # check if any files were downloaded
         if i == 1:
             print(color.red + "GenBank division " + division_input \
                   + " not found. Please use a valid division name " \
                   + "(e.g. VRT, INV, PLN)." + color.done
                  )
             sys.exit(0)
         ftp.quit()
コード例 #23
0
ファイル: genbank.py プロジェクト: aaw866/sumac
 def sqlite(path):
     """
     Sets up the SQLite db for the GenBank division.
     Path is the absolute path of the GB files.
     Returns a dictionary of SeqRecord objects.
     """
     color = Color()
     if os.path.exists(path + "/gb.idx"):
         print(color.purple + "Genbank database already downloaded. Indexing sequences..." + color.done)
         return SeqIO.index_db(path + "/gb.idx")
     else:
         files = os.listdir(path)
         path_files = []
         if len(files) == 0:
             print(color.red + "GenBank files not found. Re-download with the -d option. See --help for more details." + color.done)
             sys.exit(0)
         for file in files:
             path_files.append(path + "/" + file)
         print(color.purple + "Genbank database already downloaded. Indexing sequences..." + color.done)
         return SeqIO.index_db(path + "/gb.idx", path_files, "genbank")
コード例 #24
0
 def __init__(self, cluster_files, aligned, num_cores):
     """
     Input parameters: 
     cluster_files: a list of FASTA files 
     aligned: a string that indicated whether each file contains an 
         unaligned sequence cluster or one already aligned.
     Creates new processes to align each sequence cluster.
     Generates a list of aligned FASTA files.
     """
     taxa = None
     alignment_files = []
     if not os.path.exists("alignments"):
         os.makedirs("alignments")
     color = Color()
     if aligned == "unaligned":
         self.user_provided = False
         self.sumac_aligned = False
         print(color.blue + "Spawning " + color.red + str(num_cores) +
               color.blue + " processes to align clusters." + color.done)
         pool = multiprocessing.Pool(num_cores)
         alignment_files = pool.map(self.align_cluster, cluster_files)
         pool.close()
         pool.join()
         self.files = alignment_files
         if (not os.path.isfile(alignment_files[0])) or os.path.getsize(
                 alignment_files[0]) == 0:
             print(color.red + "Error: MAFFT is not installed correctly." +
                   color.done)
             sys.exit()
     elif aligned == "aligned":
         print(color.purple + "Loading user-provided alignments..." +
               color.done)
         self.user_provided = True
         self.sumac_aligned = False
         self.files = cluster_files
     else:
         print(color.purple + "Loading SUMAC alignments..." + color.done)
         self.user_provided = False
         self.sumac_aligned = True
         self.files = cluster_files
コード例 #25
0
 def calculate_PD(self):
     """
     Method to calculate the fraction of triples, a measure of partial decisiveness (PD).
     See: Sanderson, M.J., McMahon, M.M. & Steel, M., 2010. BMC evolutionary biology, 10. 
     """
     color = Color()
     decisive_triples = 0
     total_triples = 0
     total = self.binomial_coefficient(len(self.otus), 3)
     i = 0
     # nested loops to run through every possible triplet
     for otu1 in self.otus:
         if i < (len(self.otus) - 2):
             j = 0
             for otu2 in self.otus:
                 if i < j:
                     k = 0
                     for otu3 in self.otus:
                         if j < k:
                             sys.stdout.write("\r" + color.blue + "Calculating PD: " + color.red + str(round(100 * total_triples/float(total), 4)) + \
                                 "% " + color.blue + "finished" + color.done)
                             sys.stdout.flush()
                             # do PD calculations for this triplet
                             triplet = [otu1, otu2, otu3]
                             decisive, decisive_loci = self.calculate_triplet_PD(
                                 triplet)
                             decisive_triples += decisive
                             total_triples += 1
                             # triplet calculations for OTU and loci decisiveness scores
                             self.update_OTU_decisiveness(triplet, decisive)
                             self.update_locus_decisiveness(
                                 decisive_loci, decisive)
                         k += 1
                 j += 1
         i += 1
     sys.stdout.write("\r" + color.blue + "Calculating PD: " + color.red +
                      "100.00% " + color.blue + "finished\n" + color.done)
     sys.stdout.flush()
     return round(decisive_triples / float(total_triples), 2)
コード例 #26
0
 def get_border_color(self) -> Color:
     """
     Returns the border color of the rectangle.
     """
     return Color(200, 200, 200)
コード例 #27
0
 def get_fill_color(self) -> Color:
     """
     Returns the fill color of the rectangle.
     """
     return Color(0, 0, 0)
コード例 #28
0
def main():
    # parse the command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("--download_gb", "-d", help="Name of the GenBank division to download (e.g. PLN or MAM).")
    parser.add_argument("--download_gb2", "-d2", help="""Name of the optional second GenBank division to download. Use this
                                                         option if the ingroup and outgroup are in different GenBank divisions.""")
    parser.add_argument("--path", "-p", help="Absolute path to download GenBank files to. Defaults to ./genbank/")
    parser.add_argument("--ingroup", "-i", help="Ingroup clade to build supermatrix.")
    parser.add_argument("--outgroup", "-o", help="Outgroup clade to build supermatrix.")
    parser.add_argument("--cores", "-c", help="The number of CPU cores to use for parallel processing. Defaults to the max available.")
    parser.add_argument("--id", "-id", help="UCLUST id threshold to cluster taxa. Defaults to 0.50")
    parser.add_argument("--evalue", "-e", help="BLAST E-value threshold to cluster taxa. Defaults to 1e-10")
    parser.add_argument("--length", "-l", help="Threshold of sequence length percent similarity to cluster taxa. Defaults to 0.25")
    parser.add_argument("--maxlength", "-maxl", help="Maximum length of sequences to include in UCLUST clusters. Defaults to 5000")
    parser.add_argument("--minlength", "-minl", help="Minimum length of sequences to include in UCLUST clusters. Defaults to 100")
    parser.add_argument("--min_clusters", "-minc", help="Minimum number of taxa needed for clusters. Defaults to 4")
    parser.add_argument("--max_ingroup", "-m", help="Maximum number of taxa to include in ingroup. Default is none (no maximum limit).") 
    parser.add_argument("--guide", "-g", help="""FASTA file containing sequences to guide cluster construction. If this option is 
                                                 selected then all-by-all BLAST comparisons are not performed.""")
    parser.add_argument("--alignments", "-a", nargs='+', help="List of aligned FASTA files to build supermatrix instead of mining GenBank.")
    parser.add_argument("--salignments", "-sa", nargs='+', help="List of SUMAC alignments to build supermatrix instead of mining GenBank.")
    parser.add_argument("--search", "-s", action='store_true', help="Turn on search and cluster mode. Will not make alignments or supermatrix.")
    parser.add_argument("--decisiveness", "-de", action='store_true', help="Calculate partial decisiveness. For larger matrices this may be slow.")
    parser.add_argument("--hac", action='store_true', help="Use HAC single-linkage clustering algorithm instead of the default UCLUST algorithm.")
    parser.add_argument("--slink", action='store_true', help="Use the SLINK clustering algorithm instead of the default UCLUST algorithm.")
    args = parser.parse_args()
 
    sys.stdout = Logger()
    color = Color()

    print("")
    print(color.blue + "SUMAC: supermatrix constructor v2.22" + color.done)
    print("")

    num_cores = multiprocessing.cpu_count()
    if args.cores and int(args.cores) <= num_cores:
        num_cores = int(args.cores) 

    if args.alignments:
        # if the user provides alignments:
        alignment_files = args.alignments
        alignments = Alignments(alignment_files, "aligned", num_cores)
    elif args.salignments:
        # if the user inputs SUMAC alignments from previous run
        alignment_files = args.salignments
        alignments = Alignments(alignment_files, "sumac_aligned", num_cores)
    else:
        if args.search:
            print(color.yellow + "Running in search and cluster mode. Clusters will not be aligned and supermatrix will not assembled." + color.done) 

        # first download and set up sqllite db if necessary
        if args.path:
            gb_dir = args.path
        else:
            gb_dir = os.path.abspath("genbank/")
        # if the user requests downloading
        if args.download_gb:
            divisions = [args.download_gb]
            if args.download_gb2:
                divisions.append(args.download_gb2)
            GenBankSetup.download(divisions, gb_dir)
            print(color.yellow + "Setting up SQLite database..." + color.done)
            gb = GenBankSetup.sqlite(gb_dir)
        # the user didn't request downloading, so check for genbank directory
        elif not os.path.exists(gb_dir):
            print(color.red + "GenBank database not downloaded. Re-run with the -d option. See --help for more details." + color.done)
            sys.exit(0)
        # the genbank directory exists so check for sequences and index them
        else:
            gb = GenBankSetup.sqlite(gb_dir)
        print(color.purple + "%i sequences indexed!" % len(gb) + color.done)

        # check for ingroup and outgroup
        if args.ingroup:
            ingroup = args.ingroup
            if args.outgroup:
                outgroup = args.outgroup
            else:
                outgroup = "NONE"
        else:
            print(color.red + "Please specify ingroup. See --help for details." + color.done)
            sys.exit(0)
        
        # search db for ingroup and outgroup sequences
        print(color.blue + "Ingroup = " + ingroup + color.done)
        if args.outgroup:
            print(color.blue + "Outgroup = " + outgroup + color.done)
        print(color.blue + "Searching for ingroup and outgroup sequences..." + color.done)
        if args.max_ingroup:
            search_results = GenBankSearch(gb, ingroup, outgroup, int(args.max_ingroup))
        else:
            search_results = GenBankSearch(gb, ingroup, outgroup)
        ingroup_keys = search_results.ingroup_keys
        outgroup_keys = search_results.outgroup_keys
        all_seq_keys = ingroup_keys + outgroup_keys
        if len(all_seq_keys) == 0:
            print(color.red + "No sequences found for the ingroup and outgroup!" + color.done)
            sys.exit(0)

        # determine sequence length similarity threshold
        length_threshold = 0.25
        if args.length:
            length_threshold = float(args.length)
        print(color.blue + "Using sequence length similarity threshold " + color.red + str(length_threshold) + color.done)

        # determine e-value threshold
        id_threshold = 0.5
        if args.id:
            id_threshold = float(args.id)
        print(color.blue + "Using UCLUST id threshold " + color.red + str(id_threshold) + color.done)
        
        # determine e-value threshold
        evalue_threshold = (1.0/10**10)
        if args.evalue:
            evalue_threshold = float(args.evalue)
        print(color.blue + "Using BLAST e-value threshold " + color.red + str(evalue_threshold) + color.done)

        # now build clusters, first checking whether we are using FASTA file of guide sequences
        # or doing all-by-all comparisons
        if args.guide:
            # use FASTA file of guide sequences
            print(color.blue + "Building clusters using the guide sequences..." + color.done)
            cluster_builder = GuidedClusterBuilder(args.guide, all_seq_keys, length_threshold, evalue_threshold, gb_dir, num_cores)
        else:
            # cluster using UCLUST
            uclust_error = False
            if not (args.slink or args.hac):
                print(color.blue + "Clustering sequences with UCLUST...")
                maxlength = 5000
                minlength = 100
                if args.maxlength:
                    maxlength = int(args.maxlength)
                if args.minlength:
                    minlength = int(args.minlength)
                cluster_builder = UCLUSTClusterBuilder(gb, all_seq_keys, gb_dir, num_cores, minlength, maxlength, length_threshold, id_threshold, evalue_threshold)
                if (cluster_builder.error == True):
                    uclust_error = True
                else:
                    print(color.purple + "Clustering completed..." + color.done)
            if (args.slink or args.hac) or (uclust_error == True):
                # make distance matrix
                print(color.blue + "Making distance matrix for all sequences..." + color.done)
                distance_matrix = DistanceMatrixBuilder(gb, all_seq_keys, length_threshold, gb_dir, num_cores).distance_matrix

                # cluster sequences
                if args.hac:
                    print(color.purple + "Clustering sequences using the HAC algorithm..." + color.done)
                    cluster_builder = HACClusterBuilder(all_seq_keys, distance_matrix, evalue_threshold)
                else:
                    print(color.purple + "Clustering sequences using the SLINK algorithm..." + color.done)
                    cluster_builder = SLINKClusterBuilder(all_seq_keys, distance_matrix, evalue_threshold)

        print(color.purple + "Found " + color.red + str(len(cluster_builder.clusters)) + color.purple + " clusters." + color.done)
        if len(cluster_builder.clusters) == 0:
            print(color.red + "No clusters found." + color.done)
            sys.exit(0)

        # filter clusters, make FASTA files
        print(color.yellow + "Building sequence matrices for each cluster." + color.done)
        min_clusters = 4
        if args.min_clusters:
            min_clusters = int(args.min_clusters)
        if (args.slink or args.hac or args.guide) or (uclust_error == True):
            cluster_builder.assemble_fasta(gb, min_clusters)
        else:
            cluster_builder.assemble_fasta_uclust(min_clusters)
        print(color.purple + "Kept " + color.red + str(len(cluster_builder.clusters)) + color.purple + " clusters, discarded those with < " + str(min_clusters) + " taxa." + color.done)
        
        # if we are in search and cluster mode we are done
        if args.search:
            sys.exit(0) 
        
        if len(cluster_builder.clusters) == 0:
            print(color.red + "No clusters left to align." + color.done)
            sys.exit(0)
        # now align each cluster with MAFFT
        print(color.blue + "Aligning clusters with MAFFT..." + color.done)
        alignments = Alignments(cluster_builder.cluster_files, "unaligned", num_cores)
    
    alignments.print_data()
    alignments.make_gene_region_csv()

    # concatenate alignments
    print(color.purple + "Concatenating alignments..." + color.done)
    supermatrix = Supermatrix(alignments)
   
    try:
        imp.find_module('matplotlib')
        imp.find_module('numpy')
        matplot = True
    except ImportError:
        matplot = False
        print(color.red + "Skipping generating graphs since matplotlib is not installed." + color.done)

    if not args.alignments: # and not args.salignments:
        # only make genbank_csv if the sequences were mined direct from genbank
        supermatrix.make_genbank_csv()
    supermatrix.print_data()
    if matplot:
        supermatrix.make_sequence_data_figure()
    if args.decisiveness:
        supermatrix.print_PD()
        if matplot:
            supermatrix.make_sequence_decisiveness_figure()
        supermatrix.make_decisiveness_csv()
    print(color.yellow + "Final supermatrix: " + color.red + "alignments/supermatrix_concatenated.fasta" + color.done)
コード例 #29
0
ファイル: distancematrix.py プロジェクト: valgaze/sumac
    def distance_matrix_worker(self, seq_keys, length_threshold, dist_matrix, already_compared, lock, process_num, gb_dir):
        """
        Worker process for make_distance_matrix(). Takes a list "already_compared" of sequences that have
        already had all pairwise comparisons. Each worker process will work making pairwise comparisons
        for a different sequence, adding them to the "already_compared" list as they are completed.
        """
        # each process must load its own sqlite gb
        gb = SeqIO.index_db(gb_dir + "/gb.idx")
        process_num = str(process_num)
        i = 0
        color = Color()
        for key in seq_keys:
            # check whether another process is already comparing this row
            compare_row = False
            with lock:
                if key not in already_compared:
                    already_compared.append(key)
                    compare_row = True
            if compare_row:
                
                # make the blast query
                record1 = gb[key]
                output_handle = open('query' + process_num + '.fasta', 'w')
                SeqIO.write(record1, output_handle, 'fasta')
                output_handle.close()
                
                # make blast database
                j = 0
                output_handle = open('blast_db' + process_num + '.fasta', 'w')
                records = []
                for key2 in seq_keys:
                    # only add sequences that have not yet been compared
                    if j > i:
                        record = gb[key2]
                        records.append(record)
                    if j == i:
                        row = dist_matrix[i]
                        row[j] = 0.0
                        dist_matrix[i] = row
                    j += 1
                SeqIO.write(records, output_handle, 'fasta')
                output_handle.close()

                if len(records) > 0:
                    # blast query against blast_db
                    blastn_cmd = NcbiblastnCommandline(query='query' + process_num + '.fasta', subject='blast_db' + process_num + '.fasta', \
                        out='blast' + process_num + '.xml', outfmt=5)
                    stdout, stderr = blastn_cmd()

                    # parse blast output
                    j = i + 1
                    blastn_xml = open('blast' + process_num + '.xml', 'r')
                    blast_records = NCBIXML.parse(blastn_xml)
                    for blast_record in blast_records:
                        for alignment in blast_record.alignments:
                            # loop through each high-scoring segment pair (HSP)
                            for hsp in alignment.hsps:
                                length1 = len(record1.seq)
                                length2 = alignment.length
                                # first check if length similarity threshold met
                                if (length1 < length2 * (1 + float(length_threshold))) and (length1 > length2 * (1 - float(length_threshold))):
                                    # blast hit found, set distance to e-value
                                    row = dist_matrix[i]
                                    row[j] = hsp.expect
                                    dist_matrix[i] = row
                                    row = dist_matrix[j]
                                    row[i] = hsp.expect
                                    dist_matrix[j] = row
                                else:
                                    # set distance to 50.0 if length similarity threshold not met
                                    row = dist_matrix[i]
                                    row[j] = 50.0
                                    dist_matrix[i] = row
                                    row = dist_matrix[j]
                                    row[i] = 50.0
                                    dist_matrix[j] = row
                        j += 1
                    blastn_xml.close()
            i += 1
            # update status
            percent = str(round(100 * len(already_compared)/float(len(seq_keys)), 2))
            sys.stdout.write('\r' + color.blue + 'Completed: ' + color.red + str(len(already_compared)) + '/' + str(len(seq_keys)) + ' (' + percent + '%)' + color.done)
            sys.stdout.flush()
        # done looping through all keys, now clean up
        if os.path.exists('blast_db' + process_num + '.fasta'):
            os.remove('blast_db' + process_num + '.fasta')
        if os.path.exists("blast" + process_num + ".xml"):
            os.remove("blast" + process_num + ".xml")
        if os.path.exists("query" + process_num + ".fasta"):
            os.remove("query" + process_num + ".fasta")
        if os.path.exists("subject" + process_num + ".fasta"):
            os.remove("subject" + process_num + ".fasta")
コード例 #30
0
ファイル: distancematrix.py プロジェクト: valgaze/sumac
    def distance_matrix_worker_old(self, seq_keys, length_threshold, dist_matrix, already_compared, lock, process_num, gb_dir):
        """
        Worker process for make_distance_matrix(). Takes a list "already_compared" of sequences that have
        already had all pairwise comparisons. Each worker process will work making pairwise comparisons
        for a different sequence, adding them to the "already_compared" list as they are completed.
        """
        # each process must load its own sqlite gb
        gb = SeqIO.index_db(gb_dir + "/gb.idx")
        process_num = str(process_num)
        i = 0
        color = Color()
        for key in seq_keys:
            # check whether another process is already comparing this row
            compare_row = False
            with lock:
                if key not in already_compared:
                    already_compared.append(key)
                    compare_row = True
            if compare_row:
                # get the sequence record to compare
                record1 = gb[key]
                output_handle = open('query' + process_num + '.fasta', 'w')
                SeqIO.write(record1, output_handle, 'fasta')
                output_handle.close()
                j = 0
                for key2 in seq_keys:
                    # only calculate e-values for pairs that have not yet been compared
                    if dist_matrix[i][j] == 99:
                        if key == key2:
                            row = dist_matrix[i]
                            row[j] = 0.0
                            dist_matrix[i] = row
                        # check sequence lengths
                        else:
                            # print("proc # = "+process_num+" i = "+str(i)+ " j = "+str(j))
                            record2 = gb[key2]
                            length1 = len(record1.seq)
                            length2 = len(record2.seq)
                            # set distance to 50.0 if length similarity threshold not met
                            if (length2 < length1 * (1 + float(length_threshold))) and (length2 > length1 * (1 - float(length_threshold))):
                                row = dist_matrix[i]
                                row[j] = 50.0
                                dist_matrix[i] = row
                                row = dist_matrix[j]
                                row[i] = 50.0
                                dist_matrix[j] = row
                            else:
                                # do the blast comparison
                                output_handle = open('query' + process_num + '.fasta', 'w')
                                SeqIO.write(record2, output_handle, 'fasta')
                                output_handle.close()

                                blastn_cmd = NcbiblastnCommandline(query='query' + process_num + '.fasta', subject='subject' + process_num + \
                                    '.fasta', out='blast' + process_num + '.xml', outfmt=5)
                                stdout, stderr = blastn_cmd()
                                blastn_xml = open('blast' + process_num + '.xml', 'r')
                                blast_records = NCBIXML.parse(blastn_xml)

                                for blast_record in blast_records:
                                    if blast_record.alignments:
                                        if blast_record.alignments[0].hsps:
                                            # blast hit found, set distance to e-value
                                            row = dist_matrix[i]
                                            row[j] = blast_record.alignments[0].hsps[0].expect
                                            dist_matrix[i] = row
                                            row = dist_matrix[j]
                                            row[i] = blast_record.alignments[0].hsps[0].expect
                                            dist_matrix[j] = row
                                    else:
                                        # no blast hit found, set distance to default 10.0
                                        row = dist_matrix[i]
                                        row[j] = 10.0
                                        dist_matrix[i] = row
                                        row = dist_matrix[j]
                                        row[i] = 10.0
                                        dist_matrix[j] = row
                                blastn_xml.close()
                    j += 1
            i += 1
            # update status
            percent = str(round(100 * len(already_compared)/float(len(seq_keys)), 2))
            sys.stdout.write('\r' + color.blue + 'Completed: ' + color.red + str(len(already_compared)) + '/' + str(len(seq_keys)) + ' (' + percent + '%)' + color.done)
            sys.stdout.flush()
        # done looping through all keys, now clean up
        os.remove("blast" + process_num + ".xml")
        os.remove("query" + process_num + ".fasta")
        os.remove("subject" + process_num + ".fasta")