def main(): # Check license license.check_status() # Setup of argparse for script arguments class LicenseAction(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): print("License status: %s" % license.message()) sys.exit() parser = argparse.ArgumentParser( description="Post-processing of 'nanopolish polya' output file with " "annotation-based data aggregation and downstream analysis.", prog="pal_aggr.py") optional = parser._action_groups.pop() required = parser.add_argument_group('required arguments') required.add_argument( "-fl", type=str, default=None, metavar="path_file", help="text file of sample name and nanopolish polya output paths", required=True) optional.add_argument("-t", type=int, default=1, metavar='num_threads', help='specify number of threads to use, default = 1') optional.add_argument("-qc_filter", action='store_true', help='process only reads with qc_tag = PASS') optional.add_argument("--silent", action='store_true', help='run script without terminal outputs') optional.add_argument("-l", "--license", action=LicenseAction, metavar="", nargs=0, help='show license status and exit') parser._action_groups.append(optional) args = parser.parse_args() # Preparing logging console for __main__ time_stamp = str(datetime.datetime.now()) numeric_level = getattr(logging, 'INFO', None) logging.basicConfig(filename='npd-rnaseq-tlk.poly_aggr.' + time_stamp.replace(" ", "_") + '.log', level=logging.DEBUG, format='%(asctime)s\t%(name)-12s\t%(message)s', filemode='w') logger = logging.getLogger('pal_aggr') logger.debug('pal_aggr.py version: %s' % version.__version__) logger.debug('Input command: python pal_aggr.py ' + " ".join(sys.argv)) # Defining Handler to write messages to sys.stdout if args.silent: sys.stdout = open(os.devnull, 'w') console = logging.StreamHandler(sys.stdout) console.setLevel(numeric_level) formatter = logging.Formatter('[%(asctime)s] %(message)s', datefmt='%y-%m-%d %H:%M:%S') console.setFormatter(formatter) logging.getLogger('').addHandler(console) logger.info( 'Begin aggregation of nanopolish polyA data with npd-rnaseq-tlk ver=%s' % version.__version__) logger.info('Number of threads: %i' % args.t) for run in csv.reader(open(args.fl, 'r'), delimiter='\t'): if run[0][0] == '#': continue sample = run[0] path = run[1] logger.info('Creating directory for sample: %s' % sample) if not os.path.isdir(sample): os.mkdir(sample) logger.info('Starting run...') tx_pd, tx_aggr, gn_pd, gn_aggr, annot = license.np_polya_aggr( path, args.t, args.qc_filter, args.silent) # Saving results logger.info('Saving results...') tx_pd.to_csv(sample + '/' + 'tx_polyA.csv', index=False) gn_pd.to_csv(sample + '/' + 'gn_polyA.csv', index=False) # Store data (serialize) with open(sample + '/' + 'tx.pickle', 'wb') as handle: pickle.dump(tx_aggr, handle, protocol=pickle.HIGHEST_PROTOCOL) with open(sample + '/' + 'gn.pickle', 'wb') as handle: pickle.dump(gn_aggr, handle, protocol=pickle.HIGHEST_PROTOCOL) with open(sample + '/' + 'annotation.pickle', 'wb') as handle: pickle.dump(annot, handle, protocol=pickle.HIGHEST_PROTOCOL) logger.info('All runs are complete!')
# Import modules import sys import argparse import license if __name__ == '__main__': # Check license license.check_status() # Setup of argparse for script arguments class licenseAction(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): print("License status: %s" % license.message()) sys.exit() parser = argparse.ArgumentParser( description="create gff database for exonfeat analysis", prog="tx_db.py") optional = parser._action_groups.pop() required = parser.add_argument_group('required arguments') required.add_argument("-gff", type=str, default=None, metavar="<gff_file>", help="specify path to gff file", required=True) required.add_argument("-fa", type=str, default=None, metavar="<fasta_file>", help="specify path to the fasta file",
def main(): # Check license license.check_status() # Setup of argparse for script arguments class LicenseAction(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): print("License status: %s" %license.message()) sys.exit() parser = argparse.ArgumentParser(description="Performs statistical analysis for differential polyA length" "distributions between multiple conditions at the transcript" "and gene level.", prog="pal_diff.py") optional = parser._action_groups.pop() required = parser.add_mutually_exclusive_group() required.add_argument("-run", type=str, default=None, metavar="setup_file", help="path to text file specifying comparisons to perform, refer to wiki for formatting") required.add_argument("-sf", action='store_true', help='outputs setup_file template') optional.add_argument("--silent", action='store_true', help='use with run option, runs script without terminal outputs') optional.add_argument("-l", "--license", action=LicenseAction, metavar="", nargs=0, help='show license status and exit') parser._action_groups.append(optional) args = parser.parse_args() if args.sf: f = csv.writer(open('pal_diff_setup_file.txt', 'w'), delimiter='\t') f.writerow(['#comparison_name', 'condition_1', 'condition_2', 'test_type', 'read_cutoff', 'fdr_cutoff']) else: # Preparing logging console for __main__ time_stamp = str(datetime.datetime.now()) numeric_level = getattr(logging, 'INFO', None) logging.basicConfig(filename='npd-rnaseq-tlk.poly_diff.' + time_stamp.replace(" ", "_") + '.log', level=logging.DEBUG, format='%(asctime)s\t%(name)-12s\t%(message)s', filemode='w') logger = logging.getLogger('pal_diff') logger.debug('pal_diff.py version: %s' % version.__version__) logger.debug('Input command: python pal_diff.py ' + " ".join(sys.argv)) # Defining Handler to write messages to sys.stdout if args.silent: sys.stdout = open(os.devnull, 'w') console = logging.StreamHandler(sys.stdout) console.setLevel(numeric_level) formatter = logging.Formatter('[%(asctime)s] %(message)s', datefmt='%y-%m-%d %H:%M:%S') console.setFormatter(formatter) logging.getLogger('').addHandler(console) logger.info('pal_diff, %s' % version.__version__) logger.info('Begin differential testing of tx and gene polyA length distributions between conditions...') # Parse setup file runs = dict() with open(args.run, 'r') as setup_file: read_sf = csv.reader(setup_file, delimiter='\t') for line in read_sf: if line[0][0] == '#': continue runs[line[0]] = {'cond1': line[1].split(','), 'cond2': line[2].split(','), 'test': line[3], 'read_co': int(line[4]), 'fdr_co': float(line[5])} # setup and run tests license.pal_diff_setnrun(runs)