コード例 #1
0
ファイル: io.py プロジェクト: lcwheeler/clust
def log(msg=None, addextrastick=True):
    if addextrastick:
        msg = op.msgformated(msg, withnewline=False)
    printOnConsole = True
    with open(glob.logfile, mode='a+') as f:
        if msg is not None:
            f.write(msg)
        f.write('\n')

    if printOnConsole:
        print(msg)
コード例 #2
0
ファイル: __main__.py プロジェクト: lcwheeler/clust
def main(args=None):
    if args is None:
        args = sys.argv[1:]

    # Parse arguments
    headertxt = op.topline()
    headertxt += op.msgformated(
        'Clust\n'
        'Optimised consensus clustering of multiple heterogeneous datasets\n'
        'Version {0}\n'
        '\n'
        'By Basel Abu-Jamous\n'
        'Department of Plant Sciences\n'
        'The University of Oxford\n'
        '*****@*****.**'.format(version), '^')
    headertxt += op.midline()
    headertxt += op.msgformated('Citation\n' '~~~~~~~~', '^')
    citationtxt = 'When publishing work that uses Clust, please cite:\n' \
                  'Basel Abu-Jamous and Steven Kelly (2018) Clust: automatic extraction of optimal co-expressed ' \
                  'gene clusters from gene expression data. Genome Biology 19:172; ' \
                  'doi: https://doi.org/10.1186/s13059-018-1536-8.'
    # TODO: citation
    headertxt += op.msgformated(citationtxt, '<')
    headertxt += op.midline()
    headertxt += op.msgformated(
        'Full description of usage can be found at:\n'
        'https://github.com/BaselAbujamous/clust', '<')
    headertxt += op.bottomline()

    parser = argparse.ArgumentParser(description=headertxt,
                                     formatter_class=RawTextHelpFormatter)
    parser.add_argument('datapath',
                        help='Data file path or directory with data file(s).',
                        default=None)
    parser.add_argument(
        '-n',
        metavar='<file or int>',
        help='Normalisation file or list of codes (default: 1000)',
        default=['1000'],
        nargs='+')
    parser.add_argument('-r',
                        metavar='<file>',
                        help='Replicates structure file',
                        default=None)
    parser.add_argument('-m',
                        metavar='<file>',
                        help='OrthoGroups (OGs) mapping file',
                        default=None)
    parser.add_argument('-o',
                        metavar='<directory>',
                        help='Output directory',
                        default=None)
    parser.add_argument('-t',
                        metavar='<real number>',
                        type=float,
                        help='Cluster tightness (default: 1.0).',
                        default=1.0)
    parser.add_argument(
        '-basemethods',
        metavar='<string>',
        nargs='+',
        help='One or more base clustering methods (default: k-means)',
        default=None)
    parser.add_argument(
        '-K',
        metavar='<integer>',
        type=int,
        nargs='+',
        help='K values, e.g. 2 4 6 10 ... (default: 4 to 20 (step=4))',
        default=[n for n in range(4, 21, 4)])
    parser.add_argument('-s',
                        metavar='<real number>',
                        type=float,
                        help='Outlier standard deviations (default: 3.0)',
                        default=3.0)
    parser.add_argument(
        '-d',
        metavar='<integer>',
        type=int,
        help='Min datasets in which a gene must exist (default: 1)',
        default=1)
    parser.add_argument(
        '-fil-v',
        metavar='<real number>',
        dest='filv',
        type=float,
        help='Filtering: gene expression threshold (default: -inf)',
        default=-float("inf"))
    parser.add_argument('-fil-c',
                        metavar='<integer>',
                        dest='filc',
                        type=int,
                        help='Filtering: number of conditions (default: 0)',
                        default=0)
    parser.add_argument('-fil-d',
                        metavar='<integer>',
                        dest='fild',
                        type=int,
                        help='Filtering: number of datasets (default: 1)',
                        default=0)
    parser.add_argument('--fil-abs',
                        dest='absval',
                        action='store_true',
                        help='Filter using absolute values of expression')
    parser.add_argument(
        '--fil-perc',
        dest='filperc',
        action='store_true',
        help='-fil-v is a percentile of genes rather than raw value')
    parser.add_argument(
        '--fil-flat',
        dest='filflat',
        action='store_true',
        help='Filter out genes with flat expression profiles (default)')
    parser.add_argument('--no-fil-flat',
                        dest='filflat',
                        action='store_false',
                        help='Cancels the default --fil-flat option')
    parser.add_argument('-cs',
                        metavar='<integer>',
                        type=int,
                        help='Smallest cluster size (default: 11)',
                        default=11)
    parser.add_argument('-q3s',
                        metavar='<real number>',
                        type=float,
                        help='Q3'
                        's defining outliers (default: 2.0)',
                        default=2.0)
    parser.add_argument('--no-optimisation',
                        dest='optimisation',
                        action='store_false',
                        help='Skip cluster optimsation & completion')
    parser.add_argument(
        '--deterministic',
        dest='deterministic',
        action='store_true',
        help='Obsolete. All steps are already deterministic (v1.7.4+)')
    parser.add_argument('-np',
                        metavar='<integer>',
                        type=int,
                        help='Number of parallel processes (default: 1)',
                        default=1)
    parser.set_defaults(optimisation=True,
                        deterministic=False,
                        absval=False,
                        filperc=False,
                        filflat=True)
    # parser.add_argument('-ec', type=int, help='Perform error correction, 1 or 0 (default: 1)', default=1)

    if len(args) == 0:
        parser.parse_args(['-h'])

    args = parser.parse_args(args)

    if args.filperc:
        filtype = 'perc'
    else:
        filtype = 'raw'

    if args.basemethods is not None:
        args.basemethods = [[m] for m in args.basemethods]

    # Call the clust function
    clustpipeline.clustpipeline(args.datapath, args.m, args.r, args.n, args.o,
                                args.K, args.t, args.s, args.d, args.filv,
                                args.filc, args.fild, args.absval, filtype,
                                args.filflat, args.cs, args.np,
                                args.optimisation, args.q3s, args.basemethods,
                                args.deterministic)