Beispiel #1
0
    def processfiles(self):
        self.logger.info("Process Files Inititated")
        self.counter = 0
        self.targets = []
        self.masterdf = pd.DataFrame(columns=['seqid', 'position', 'coverage'])

        while self.running:
            currenttime = time.time()
            #for fastqfile, createtime in tqdm(sorted(self.creates.items(), key=lambda x: x[1])):
            fastqfilelist = list()
            for fastqfile, createtime in sorted(self.creates.items(),
                                                key=lambda x: x[1]):

                delaytime = 0

                # file created 5 sec ago, so should be complete. For simulations we make the time longer.
                if (int(createtime) + delaytime < time.time()):
                    self.logger.info(fastqfile)
                    del self.creates[fastqfile]
                    self.counter += 1
                    fastqfilelist.append(fastqfile)

                    #print (fastqfile,md5Checksum(fastqfile), "\n\n\n\n")
            targets, self.masterdf = parse_fastq_file(fastqfilelist, self.args,
                                                      logging, self.masterdf)
            print(targets)
            print(self.targets)
            if len(targets) > len(self.targets):
                updated_targets = set(targets) - set(self.targets)
                update_message = "Updating targets with {}".format(
                    nice_join(updated_targets, conjunction="and"))
                self.logger.info(update_message)
                if not self.args.simulation:
                    send_message(self.connection, update_message,
                                 Severity.WARN)
                write_new_toml(self.args, targets)
                self.targets = []
                self.targets = targets.copy()

            if self.masterdf.shape[0] > 0 and self.masterdf.shape[0] == len(
                    self.targets):
                # Every target is covered at the desired coverage level.
                self.logger.info(
                    "Every target is covered at at least {}x".format(
                        self.args.depth))
                if not self.args.simulation:
                    self.connection.protocol.stop_protocol()
                    send_message(
                        self.connection,
                        "Iter Align has stopped the run as all targets should be covered by at least {}x"
                        .format(self.args.depth),
                        Severity.WARN,
                    )

            #parse_fastq_file(fastqfile, self.rundict, self.fastqdict, self.args, self.header, self.MinotourConnection)

            #self.args.files_processed += 1

            if currenttime + 5 > time.time():
                time.sleep(5)
Beispiel #2
0
         metavar="THREADS",
         help=
         "Set the number of default threads to use for threaded tasks (default {})"
         .format(DEFAULT_CORES),
         default=DEFAULT_CORES,
         type=int,
     ),
 ),
 (
     "--log-level",
     dict(
         metavar="LOG-LEVEL",
         action="store",
         default="info",
         choices=LOG_LEVELS,
         help="One of: {}".format(nice_join(LOG_LEVELS)),
     ),
 ),
 (
     "--log-format",
     dict(
         metavar="LOG-FORMAT",
         action="store",
         default=DEFAULT_LOG_FORMAT,
         help="A standard Python logging format string (default: {!r})".
         format(DEFAULT_LOG_FORMAT.replace("%", "%%")),
     ),
 ),
 (
     "--log-file",
     dict(
Beispiel #3
0
     "--experiment-name",
     dict(
         metavar="EXPERIMENT-NAME",
         type=str,
         help="Describe the experiment being run, enclose in quotes",
         required=True,
     ),
 ),
 (
     "--read-cache",
     dict(
         metavar="READ_CACHE",
         action="store",
         default=DEFAULT_READ_CACHE,
         choices=READ_CACHE,
         help="One of: {} (default: {})".format(nice_join(READ_CACHE),
                                                DEFAULT_READ_CACHE),
     ),
 ),
 (
     "--workers",
     dict(
         metavar="WORKERS",
         type=int,
         help="Number of worker threads (default: {})".format(
             DEFAULT_WORKERS),
         default=DEFAULT_WORKERS,
     ),
 ),
 (
     "--channels",
    def processfiles(self):
        self.logger.info("Process Files Inititated")
        self.counter = 1
        self.targets = []
        self.masterdf = pd.DataFrame(columns=['seqid', 'position', 'coverage'])
        self.taxid_entries = 0
        self.downloaded_set = set()
        self.length_dict = {}
        self.coverage_sum = {}

        if self.args.references:
            logging.info("References argument provided. Will download references genomes.")
            self.downloaded_set = set(self.args.references)
            logging.info(self.downloaded_set)
            self.url_list = url_list_generation(self.args, self.args.references)
            self.length_dict.update(download_references(self.args, self.url_list, self.downloaded_set))
            generate_mmi(self.args, self.counter)


        while self.running:
            currenttime = time.time()
            # for fastqfile, createtime in tqdm(sorted(self.creates.items(), key=lambda x: x[1])):
            fastqfilelist = list()
            for fastqfile, createtime in sorted(self.creates.items(), key=lambda x: x[1]):

                delaytime = 0

                # file created 5 sec ago, so should be complete. For simulations we make the time longer.
                if (int(createtime) + delaytime < time.time()):
                    self.logger.info(fastqfile)
                    del self.creates[fastqfile]
                    self.counter += 1
                    fastqfilelist.append(fastqfile)

                    # print (fastqfile,md5Checksum(fastqfile), "\n\n\n\n")
            # as long as there are files within the args.watch directory to parse
            if fastqfilelist:
                print(self.downloaded_set)
                targets, self.downloaded_set, self.taxid_entries, self.coverage_sum = parse_fastq_file(fastqfilelist, self.args, logging, self.length_dict, self.downloaded_set, self.taxid_entries, self.coverage_sum, self.connection)
                print(targets)
                print(self.targets)

                if len(targets) > len(self.targets):
                    updated_targets = set(targets) - set(self.targets)
                    update_message = "Updating targets with {}".format(nice_join(updated_targets, conjunction="and"))
                    self.logger.info(update_message)
                    if not self.args.simulation:
                        #send_message_port(update_message, self.args.host, self.messageport)
                        send_message(self.connection, update_message, Severity.WARN)
                    write_new_toml(self.args, targets)
                    self.targets = []
                    self.targets = targets.copy()

                if self.masterdf.shape[0] > 0 and self.masterdf.shape[0] == len(self.targets):
                    # Every target is covered at the desired coverage level.
                    self.logger.info("Every target is covered at at least {}x".format(self.args.depth))
                    if not self.args.simulation:
                        self.connection.protocol.stop_protocol()
                        #send_message_port(
                        #    "Iter Align has stopped the run as all targets should be covered by at least {}x".format(
                        #        self.args.depth), self.args.host, self.messageport)
                        send_message(self.connection, "Iter Align has stopped the run as all targets should be covered by at least {}x".format(
                                self.args.depth), Severity.WARN)

                # parse_fastq_file(fastqfile, self.rundict, self.fastqdict, self.args, self.header, self.MinotourConnection)

                # self.args.files_processed += 1

                if currenttime + 5 > time.time():
                    time.sleep(5)
Beispiel #5
0
     dict(
         metavar="EXPERIMENT-NAME",
         type=str,
         help="Describe the experiment being run, enclose in quotes",
         required=True,
     ),
 ),
 (
     "--read-cache",
     dict(
         metavar="READ_CACHE",
         action="store",
         default=DEFAULT_READ_CACHE,
         choices=READ_CACHE,
         help="One of: {} (default: {})".format(
             nice_join(READ_CACHE), DEFAULT_READ_CACHE
         ),
     ),
 ),
 (
     "--workers",
     dict(
         metavar="WORKERS",
         type=int,
         help="Number of worker threads (default: {})".format(DEFAULT_WORKERS),
         default=DEFAULT_WORKERS,
     ),
 ),
 (
     "--channels",
     dict(