Example #1
0
def summarize_hits(f):
    f = Path(f)
    cs = f.basename().split('.')
    query = cs[0]

    outfile = f.dirname() / (f.basename() + '.tsv')
    alns = parse(open(f).read())

    if not alns:
        print 'No content: ' + f
        return None
    print(outfile)
    with open(outfile, 'w') as csvfile:
        writer = csv.writer(csvfile, delimiter='\t')

        for each in alns:
            q, t, e = each[0][:3]

            tid, tseq, ts, te = t
            qid, qseq, qs, qe = q

            qid = qid.split()[0].replace('lcl|', '')
            tid = tid.split()[0].replace('lcl|', '')

            if tid.startswith('UniRef'):
                continue
            if tid.startswith('pfam'):
                continue
            if tid.startswith('up_'):
                continue

            writer.writerow((tid, e))
Example #2
0
 def __get_source_info_base(source: Path) -> SourceInfo:
     return SourceInfo(
         dir_name=source.dirname(),
         base_name=source.basename(),
         size=source.getsize(),
         mtime=int(source.getmtime()),
     )
Example #3
0
def download_file(downloaded_file_path,
                  url,
                  max_connections=2,
                  max_concurrent=5):
    """Download file to specified location."""
    file_path = Path(downloaded_file_path)
    assert file_path.isabs(
    ), "download file path must be absolute, not relative"
    if file_path.exists():
        log("{} already downloaded".format(file_path))
        return

    log("Downloading: {}\n".format(url))
    aria2c = Command("aria2c")
    aria2c = aria2c("--max-connection-per-server={}".format(max_connections))
    aria2c = aria2c("--max-concurrent-downloads={}".format(max_concurrent))

    try:
        aria2c("--dir={}".format(file_path.dirname()),
               "--out={}.part".format(file_path.basename()), url).run()
    except CommandError:
        raise DownloadError(
            "Failed to download {}. Re-running may fix the problem.".format(
                url))

    shutil.move(file_path + ".part", file_path)
Example #4
0
def _get_best_models(args):
    outdir, tag, group, models_list = args
    outdir = Path(outdir)
    logfile = models_list[0][1].dirname().joinpath(
        '{complex_id}-min.log'.format(complex_id=tag))

    _setup_logger(logfile)

    tag = int(tag)
    try:
        best_idx = group.sort_values('prediction', ascending=False).index[0]
        best_path = models_list[best_idx][1]

        with open(best_path, 'r') as f:
            lines = f.readlines()
        best_path.write_lines(lines + ['END\n'])

        best_nmin, best_psf = prepare_pdb22(best_path, best_path.stripext())
        minimized, nmin, psf = minimization.minimize_energy(
            best_nmin,
            out=best_nmin[:-8] + 'min.pdb',
            psf=best_psf,
            clean=True)
        minimized, nmin, psf = Path(minimized), Path(nmin), Path(psf)
        minimized.move(outdir)
        nmin.move(outdir)
        psf.move(outdir)
        return tag, outdir.joinpath(minimized.basename())
    except Exception as e:
        logging.error('Error minimizing model for line %i' % tag)
        logging.exception(e)
        return tag, None
Example #5
0
def cache_document(src, dest=None):
    "Cache a document, return filename of the dest file."

    # TODO: use a staging area in case something breaks in the middle of adding.
    src = Path(src)

    if dest is None:
        # Find a reasonable filename if dest isn't specified
        if is_url(src):
            dest = CACHE / secure_filename(src)
        else:
            dest = CACHE / src.basename()
    else:
        dest = CACHE / dest

    if dest.exists():
        # TODO: Suggest update methods or renaming the file
        raise SkidFileExists(dest)

    if is_url(src):
        cache_url(src, dest)

    elif src.exists():  # is this something on disk?
        src.copy2(dest)
        print('copy:', src, '->', dest)

    else:
        raise SkidError(
            "cache_document doesn't know what to do with source %r\n"
            "Trying to add a nonexistent file?" % str(src))

    return dest
Example #6
0
    def copyfiles(self, skip_sky=False, skip_pattern=False, skip_opt=False):
        d = self.tempdir

        if os.path.exists(self.scene):
            fn = Path(self.scene)
            fn.copy(d / fn.basename())
        else:
            fn = d / 'cscene.can'
            fn.write_text(self.scene)
        self.scene = Path(fn.basename())

        if not skip_sky:
            if os.path.exists(self.sky):
                fn = Path(self.sky)
                fn.copy(d / fn.basename())
            else:
                fn = d / 'sky.light'
                fn.write_text(self.sky)
            self.sky = Path(fn.basename())

        if not skip_pattern:
            if self.infinity:
                if os.path.exists(self.pattern):
                    fn = Path(self.pattern)
                    fn.copy(d / fn.basename())
                else:
                    fn = d / 'pattern.8'
                    fn.write_text(self.pattern)
                self.pattern = Path(fn.basename())

        if self.sensor is not None:
            if os.path.exists(self.sensor):
                fn = Path(self.sensor)
                fn.copy(d / fn.basename())
            else:
                fn = d / 'sensor.can'
                fn.write_text(self.sensor)
            self.sensor = Path(fn.basename())

        if not skip_opt:
            optn = map(lambda (x): x + '.opt', _safe_iter(self.optnames))
            try:
                for i, opt in enumerate(_safe_iter(self.opticals)):
                    # safe_iter allows not to iterate along character composing the optfile name when only one optfile is given
                    if os.path.exists(opt):
                        # print opt
                        fn = Path(opt)
                        fn.copy(d / optn[i])
                    else:
                        fn = d / optn[i]
                        fn.write_text(opt)
                self.opticals = map(Path, _safe_iter(optn))
            except IndexError:
                raise CaribuOptionError(
                    "Optnames list must be None or as long as optfiles list")
Example #7
0
 def build(self, dockerfile):
     dockerfile = Path(dockerfile)
     tag = 'rf-' + dockerfile.basename().replace('.dkf', '')
     dockerfile.copy('redfish-client/tests/Dockerfile')
     response = [line for line in self.cli.build(
         path='redfish-client/tests',
         tag=tag,
         rm=True)]
     return(response)
Example #8
0
 def build(self, dockerfile):
     dockerfile = Path(dockerfile)
     tag = 'rf' + dockerfile.basename().replace('Dockerfile.', '')
     dockerfile.copy('redfish-client/tests/Dockerfile')
     response = [line for line in self.cli.build(
         path='redfish-client/tests',
         tag=tag,
         rm=True)]
     return(response)
    def copyfiles(self, skip_sky=False, skip_pattern=False, skip_opt=False):
        d = self.tempdir

        if os.path.exists(self.scene):
            fn = Path(self.scene)
            fn.copy(d / fn.basename())
        else:
            fn = d / 'cscene.can'
            fn.write_text(self.scene)
        self.scene = Path(fn.basename())

        if not skip_sky:
            if os.path.exists(self.sky):
                fn = Path(self.sky)
                fn.copy(d / fn.basename())
            else:
                fn = d / 'sky.light'
                fn.write_text(self.sky)
            self.sky = Path(fn.basename())

        if not skip_pattern:
            if self.infinity:
                if os.path.exists(self.pattern):
                    fn = Path(self.pattern)
                    fn.copy(d / fn.basename())
                else:
                    fn = d / 'pattern.8'
                    fn.write_text(self.pattern)
                self.pattern = Path(fn.basename())

        if self.sensor is not None:
            if os.path.exists(self.sensor):
                fn = Path(self.sensor)
                fn.copy(d / fn.basename())
            else:
                fn = d / 'sensor.can'
                fn.write_text(self.sensor)
            self.sensor = Path(fn.basename())

        if not skip_opt:
            optn = map(lambda (x): x + '.opt', _safe_iter(self.optnames))
            try:
                for i, opt in enumerate(_safe_iter(self.opticals)):
                    # safe_iter allows not to iterate along character composing the optfile name when only one optfile is given
                    if os.path.exists(opt):
                        # print opt
                        fn = Path(opt)
                        fn.copy(d / optn[i])
                    else:
                        fn = d / optn[i]
                        fn.write_text(opt)
                self.opticals = map(Path, _safe_iter(optn))
            except IndexError:
                raise CaribuOptionError("Optnames list must be None or as long as optfiles list")
Example #10
0
 def add_file(self, file: Path):
     if file.exists() and file.ext == ".png":
         tmp = Image.open(file.abspath())
         size = tmp.size
         tmp.close()
         self.files.insert("",
                           "end",
                           text=file.basename(),
                           values=("{}x{}".format(*size),
                                   "{}".format(file.abspath())))
def main():
    parser = argparse.ArgumentParser(description='cleanup a filename, \
            replace . with spaces, delete some common shit at the end of a filename\
            ex: "actual.file.name-1080p.BluRay.x264-GECKOS[rarbg]" -> "actual filename"')
    parser.add_argument('filenames', nargs='+', help='list of files and folders to cleanup')
    args = parser.parse_args()
    for f in args.filenames:
        p = Path(f.strip())
        newname = rename(p.basename(), p.isdir())
        p = p.rename(Path.joinpath(p.dirname(), newname))
        print(p.abspath(), end='')
Example #12
0
    def __init__(self, data_dir, transform, max_num_instances):
        self.transform = transform
        self.max_num_instances = max_num_instances
        
        img_dir = Path(data_dir)
        seg_dir = Path(os.path.join(img_dir.dirname().parent, 'segmentation', img_dir.basename()))
        flo_dir = [Path(os.path.join(img_dir.dirname().parent, 'flow_f', img_dir.basename())), Path(os.path.join(img_dir.dirname().parent, 'flow_b', img_dir.basename()))]

        intrinsics = np.genfromtxt(img_dir/'cam.txt').astype(np.float32).reshape((3, 3))
        imgs = sorted(img_dir.files('*.jpg'))
        flof = sorted(flo_dir[0].files('*.flo'))   # 00: src, 01: tgt
        flob = sorted(flo_dir[1].files('*.flo'))   # 00: tgt, 01: src
        segm = sorted(seg_dir.files('*.npy')) 
        
        sequence_set = []
        for i in range(len(imgs)-1):
            sample = {'intrinsics':intrinsics, 'img0':imgs[i], 'img1':imgs[i+1], 
                      'flof':flof[i], 'flob':flob[i], 'seg0':segm[i], 'seg1':segm[i+1]}   # will be processed when getitem() is called
            sequence_set.append(sample)
        self.samples = sequence_set
Example #13
0
File: gsea.py Project: mfiers/rat
def set2rank2(args):

    rnkfile = Path(args['rnkfile']).expanduser().abspath()
    gsetfile = Path(args['gsetfile']).expanduser().abspath()
    cachedir = Path(args['cachedir']).expanduser().abspath()
    gseajar = Path(args['gseajar']).expanduser().abspath()

    def fix_rv(rv):
        rv['gset'] = str(gsetfile.basename()).replace(".grp",
                                                      '').replace(".list", '')
        rv['gset_type'] = str(gsetfile.dirname().basename())
        rv['rank'] = str(rnkfile.basename()).replace(".rnk", '')
        rv['rank_type'] = str(rnkfile.dirname().basename())
        del rv['cachedir']
        return rv

    if os.path.exists(cachedir):
        rv = _check_gsea(cachedir)
        if rv is False:
            cachedir.rmtree()
        else:
            assert isinstance(rv, pd.DataFrame)
            return fix_rv(rv)

    cachedir.makedirs_p()

    if '.list' in gsetfile:
        grpfile = cachedir / (gsetfile.basename().replace('.list', '.grp'))
        os.symlink(gsetfile, grpfile)
        gsetfile = grpfile

    cl = ("""-cp %s 
            -Xmx2048m xtools.gsea.GseaPreranked 
            -gmx %s -collapse false 
            -mode Max_probe -norm meandiv 
            -nperm 1000 -rnk %s
            -scoring_scheme weighted -rpt_label my_analysis
            -include_only_symbols true
            -make_sets true -plot_top_x 1
            -rnd_seed timestamp -set_max 9999
            -set_min 4 -zip_report false
            -out %s -gui false """ %
          (gseajar, gsetfile, rnkfile, cachedir)).split()

    import sh

    try:
        java(*cl)  #, _out = str(cachedir / 'gsea.out'),
        #_err = str(cachedir / 'gsea.err'))
    except sh.ErrorReturnCode_1:
        return False

    return fix_rv(_check_gsea(cachedir))
Example #14
0
File: gsea.py Project: mfiers/rat
def set2rank2(args):

    rnkfile = Path(args['rnkfile']).expanduser().abspath()
    gsetfile = Path(args['gsetfile']).expanduser().abspath()
    cachedir = Path(args['cachedir']).expanduser().abspath()
    gseajar = Path(args['gseajar']).expanduser().abspath()

    def fix_rv(rv):
        rv['gset'] = str(gsetfile.basename()).replace(".grp", '').replace(".list", '')
        rv['gset_type'] = str(gsetfile.dirname().basename())
        rv['rank'] = str(rnkfile.basename()).replace(".rnk", '')
        rv['rank_type'] = str(rnkfile.dirname().basename())
        del rv['cachedir']
        return rv
        
    if os.path.exists(cachedir):
        rv = _check_gsea(cachedir)
        if rv is False:
            cachedir.rmtree()
        else:
            assert isinstance(rv, pd.DataFrame)
            return fix_rv(rv)

    cachedir.makedirs_p()

    if '.list' in gsetfile:
        grpfile = cachedir / (gsetfile.basename().replace('.list', '.grp'))
        os.symlink(gsetfile, grpfile)
        gsetfile = grpfile

    cl = ("""-cp %s 
            -Xmx2048m xtools.gsea.GseaPreranked 
            -gmx %s -collapse false 
            -mode Max_probe -norm meandiv 
            -nperm 1000 -rnk %s
            -scoring_scheme weighted -rpt_label my_analysis
            -include_only_symbols true
            -make_sets true -plot_top_x 1
            -rnd_seed timestamp -set_max 9999
            -set_min 4 -zip_report false
            -out %s -gui false """ % (
                gseajar, gsetfile, rnkfile, cachedir)).split()


    import sh
    
    try:
        java(*cl)#, _out = str(cachedir / 'gsea.out'),
         #_err = str(cachedir / 'gsea.err'))
    except sh.ErrorReturnCode_1:
        return False
        
    return fix_rv(_check_gsea(cachedir))
Example #15
0
def summarize_ssearch_summary(f):
    f = Path(f)
    cs = f.basename().split('.')
    query = cs[0]

    outfile = f.dirname() / (f.basename() + '.tsv')

    r = []
    started = False

    with open(f) as ifh:
        for l in ifh:
            if l.startswith("The best scores are:"):
                started = True
                continue
            if started:
                cs = l.strip().split()
                if cs:
                    e = cs[-1]
                    tid = cs[0]
                    tid = tid.replace('lcl|', '')
                    if tid.startswith('UniRef'):
                        continue
                    if tid.startswith('pfam'):
                        continue
                    if tid.startswith('up_'):
                        continue
                    r.append([tid, e])
                else:
                    break

    if not r:
        print 'No content: ' + f
        return None

    print(outfile)
    with open(outfile, 'w') as csvfile:
        writer = csv.writer(csvfile, delimiter='\t')
        for row in r:
            writer.writerow(row)
Example #16
0
def load_test_results(directory: Path) -> TestResults:
    """Takes path to directory with test results generated by network-tests2 as
    argument. Returns an instance of TestResults generated from it"""
    hostnames = tuple(read_benchmark_hostnames(directory.joinpath("network_hosts.txt")))
    kwargs = {
        arg: load_nc_file_as_tables(hostnames, directory.joinpath(filename))
        for (arg, filename) in [
            ("medians", "network_median.nc"),
            ("means", "network_average.nc"),
            ("std_dev", "network_deviation.nc")
        ]
    }
    return TestResults(name=directory.basename(), hostnames=hostnames, **kwargs)
Example #17
0
 def IdentFile(dir_layout, fn, suffix):
     fn = Path(fn)
     if dir_layout == 'clean':
         lst = [fn.dirname() + '/_spiper/' + fn.basename(), suffix]
         # job_name, suffix]
         # input_ident_file = '{pre_dir}/_spiper/{pre_base}.{job_name}.{suffix}'.format(**locals())
     elif dir_layout == 'flat':
         lst = [fn, suffix]
         # lst = [fn,job_name,suffix]
     else:
         assert 0, ("dir_layout", dir_layout)
         # input_ident_file = '{prefix}.{job_name}.{suffix}'.format(**locals())
     input_ident_file = '.'.join(lst)
     return Path(input_ident_file)
Example #18
0
def load_test_results(directory: Path) -> TestResults:
    """Takes path to directory with test results generated by network-tests2 as
    argument. Returns an instance of TestResults generated from it"""
    hostnames = tuple(
        read_benchmark_hostnames(directory.joinpath("network_hosts.txt")))
    kwargs = {
        arg: load_nc_file_as_tables(hostnames, directory.joinpath(filename))
        for (arg, filename) in [(
            "medians", "network_median.nc"), (
                "means",
                "network_average.nc"), ("std_dev", "network_deviation.nc")]
    }
    return TestResults(name=directory.basename(),
                       hostnames=hostnames,
                       **kwargs)
Example #19
0
def main(results_path):
    results_path = Path(results_path)
    for csv_path in glob.glob(results_path / "**" / "*.csv", recursive=True):
        csv_path = Path(csv_path)
        csv_basename = csv_path.basename()[:-4]
        csv_parent = csv_path.parent
        data = pd.read_csv(csv_path)
        columns = data.columns
        no2_columns = [c for c in columns if "NO2" in c]
        o3_columns = [c for c in columns if "O3" in c]
        base_columns = set(columns) - set(o3_columns) - set(no2_columns) - {
            "Unnamed: 0"
        }
        with open(csv_parent / ("%s-no2.tex" % csv_basename), 'w') as fp:
            fp.write(data[list(base_columns) + no2_columns].to_latex())
        with open(csv_parent / ("%s-o3.tex" % csv_basename), 'w') as fp:
            fp.write(data[list(base_columns) + o3_columns].to_latex())
Example #20
0
 def save(self, list_path='grids.list', grid_ids=None):
     list_path = Path(list_path)
     grids_dx = {}
     with open(list_path, 'w') as f:
         f.write('%i\n' % self.grid.shape[0])
         for i, chan in enumerate(self.grid):
             if grid_ids is not None and i not in grid_ids:
                 continue
             if self.names is None:
                 name = list_path.basename().stripext() + f'.{i}.dx'
             else:
                 name = f'{self.names[i]}.dx'
             grid_dx = _Grid(grid=chan,
                             origin=self.origin,
                             delta=self.delta)
             grid_dx.export(list_path.dirname() / name)
             grids_dx[name] = name
             f.write(name + '\n')
Example #21
0
def bootstrap(ctx, branch, projectpath='', projectname='',
        taskspath='tasks',
        configpath='config',
        utilspath='utils',
        virtualenv=True,
        upgradereqs=False):

    cwd = Path.getcwd()

    if projectpath:
        projectpath = Path(projectpath)
        os.chdir(projectpath)
    elif INITIAL_PATH.basename() == 'tasks':
        projectpath = INITIAL_PATH.parent()
        os.chdir(projectpath)
    else:
        projectpath = INITIAL_PATH

    if not projectname:
        projectname = str(projectpath.basename())
    Config.project_name = projectname

    Config.virtualenv = virtualenv

    # TODO: parse local.cfg to Config if exists?
    Config.get_tasks = True
    Config.get_config = True
    Config.requirements = True  # Install?

    get_tasks(ctx, taskspath)
    get_config(ctx, configpath, branch=branch)
    activate_virtualenv(ctx, projectname)
    install_requirements(ctx, upgrade=upgradereqs)

    clone(ctx, 'config/base.cfg')
    fetch(ctx)

    # SAO
    sao_install(ctx)
    os.chdir(cwd)
    sao_grunt(ctx)

    if Path.getcwd() != INITIAL_PATH:
        os.chdir(INITIAL_PATH)
Example #22
0
    def parse_hits(self, f, max_evalue=10.0):
        f = Path(f)
        query_name = f.basename().split('.seq')[0]
        query = self[query_name]

        if query.size == 1:
            return None
        
        
        lines = open(f).read().splitlines()
        
        if not lines:
            print 'No content: '+f
            return None


        aln_num = 0
        hits = Hits()

        tids= set()
        for each in lines[::-1]:
            tid, e = each.split()
            e = float(e)
            tid = tid.split()[0].replace('lcl|','')
            if query.id == tid:
                continue
            if e > max_evalue:
                continue
            if tid in tids:
                continue
            else:
                tids.add(tid)
            target = self[tid]
            hits.append(Hit(query, target, e, standard=self.standard, db=getattr(self, "pair2JG_judge", None)))

        hits.seqlen = query.seqlen
        hits.id = query.id
        
        hits.superfamily = query.superfamily
        hits.fold = query.fold
        hits.family = query.family
        hits.size = query.size

        return hits
Example #23
0
def abyss(log, cfg, r1, r2, out):
    out = Path(out)
    dir = out.dirname()
    f1 = dir.relpathto(r1)
    f2 = dir.relpathto(r2)
    prefix = out.basename().split('-')[0]
    #print f1, f2, 'name=%s' % prefix, 'k=%s' % 25
    #cmd("in='%s %s'" % (f1, f2),'name=%s' % prefix, 'k=%s' % 25, C=dir, _err=log, _out=log)
    # cmd.wait()
    abyss_log = os.path.join(dir, "abyss.log")
    kmer = cfg.assembly.kmer
    cmd_template = r"abyss-pe in='{f1} {f2}' name={prefix} k={kmer} -C {dir} > {abyss_log} 2>&1"
    cmd = cmd_template.format(f1=f1,
                              f2=f2,
                              prefix=prefix,
                              kmer=kmer,
                              dir=dir,
                              abyss_log=abyss_log)
    log.write('\nRunning:\n{command}\nwriting log to {abyss_log}'.format(
        command=cmd, abyss_log=abyss_log))
    os.system(cmd)
Example #24
0
def step_copytree_to_workdir(ctx, source_dir):
    """Copy a directory :param:`source_dir` -> workdir:basename(source_dir)/"""
    ensure_workdir_exists(ctx)
    source_dir = Path(source_dir).normpath()
    dest_dir = source_dir.basename()
    step_copytree_to_workdir_with_destdir(ctx, source_dir, dest_dir)
Example #25
0
def main():
    global SOURCE
    global OUTPUT
    global STYLE_FILE
    global STYLE_IMAGES
    global CHM

    parser = argparse.ArgumentParser()
    parser.add_argument('-s',
                        '--style',
                        required=True,
                        help="Style sheet for docs")
    parser.add_argument('docs_source', help="QT docs source directory")
    parser.add_argument('docs_out', help="Output directory for chm files")
    args = parser.parse_args()

    SOURCE = Path(args.docs_source).abspath()
    OUTPUT = Path(args.docs_out).abspath()
    STYLE_FILE = Path(args.style).abspath()

    with open(SOURCE / 'qtdoc' / 'qtdoc.index', encoding='utf-8') as r:
        qt_version = re.search(r'<INDEX.*version="(.*?)"', r.read())

        if not qt_version:
            raise RuntimeError("Failed to parse QT Docs version")

        qt_version = qt_version.group(1)

    STYLE_IMAGES = (
        "ico_out.png",
        "ico_note.png",
        "ico_note_attention.png",
        "btn_prev.png",
        "btn_next.png",
        "home.png",
        "arrow_bc.png",
        "bgrContent.png",
        "bullet_dn.png",
        "bullet_sq.png",
        "logo.png",
    )

    CHM = chm.DocChm(f'Qt-{qt_version}',
                     default_topic='qtdoc/index.html',
                     title=f'Qt {qt_version}')

    OUTPUT.mkdir_p()

    images_dir = OUTPUT / 'images'

    images_dir.mkdir_p()

    for image in STYLE_IMAGES:
        shutil.copy(SOURCE / 'qtdoc' / 'images' / image, images_dir / image)

    # put qtdoc first
    process_module(SOURCE / 'qtdoc')

    excluded_dirs = ('config', 'global', 'qtdoc')

    for module in SOURCE.dirs():
        if module.basename() not in excluded_dirs:
            process_module(module)

    shutil.copy(STYLE_FILE, OUTPUT / STYLE_FILE.basename())

    for image in STYLE_IMAGES:
        CHM.append(f"images\{image}")

    CHM.append(STYLE_FILE.basename())

    with OUTPUT:
        CHM.save()

    print(f"QT Docs v.{qt_version} are ready for CHM compilation")
Example #26
0
num_cpus = args.cpus 
ref_path = args.reference
patient_zero = args.reference_name

# fix fasta header names
fa_fps = bs.get_filepaths(seqs_dir, data_fmt='fasta', data_type='', tech='')
for fa_fp in fa_fps['IonXpress']:
    seq = SeqIO.read(fa_fp, 'fasta')
    seq.id = fa_fp.split('/')[-1].split('.')[0]
    SeqIO.write(seq, fa_fp, 'fasta')
msa_dir = out_dir/'msa'
if not Path.isdir(msa_dir):
    Path.mkdir(msa_dir)
# copy reference sequence into folder containing fasta files
copy(ref_path, seqs_dir);
seqs_fp = bs.concat_fasta(seqs_dir, msa_dir/out_dir.basename())
msa_fn = seqs_fp.split('/')[-1].split('.')[0] + '_aligned.fa'
msa_fp = Path(seqs_fp).parent/msa_fn
if not Path.isfile(Path(msa_fp)):
    msa_fp = bs.align_fasta(seqs_fp, msa_fp, num_cpus=num_cpus);
# load multiple sequence alignment
msa_data = bs.load_fasta(msa_fp, is_aligned=True)
muts_dir = out_dir/'mutations'
if not Path.isdir(muts_dir):
    Path.mkdir(muts_dir)
# identify insertions
insertions = bm.identify_insertions(msa_data, 
                                    meta_fp=None, 
                                    data_src='jordan',
                                    patient_zero=patient_zero, 
                                    min_ins_len=1)
Example #27
0
 num_samples_missing_coverage = ans[
     ans['percent_coverage_cds'].isna()].shape[0]
 # compute number of samples below 90% coverage
 low_coverage_samples = ans[ans["percent_coverage_cds"] < 90]
 # ignore samples below 90% coverage
 ans = ans[ans["percent_coverage_cds"] >= 90]
 # generate concatenated consensus sequences
 if not dry_run:
     # Transfer files
     transfer_files(ans, out_dir, include_bams=include_bams, ncpus=num_cpus)
     msa_dir = out_dir / 'msa'
     if not Path.isdir(msa_dir):
         Path.mkdir(msa_dir)
     seqs_dir = Path(out_dir / 'fa')
     copy(ref_path, seqs_dir)
     seqs_fp = concat_fasta(seqs_dir, msa_dir / out_dir.basename())
     # load concatenated sequences
     cns_seqs = SeqIO.parse(msa_dir / out_dir.basename() + '.fa', 'fasta')
     cns_seqs = list(cns_seqs)
     # generate files containing metadata for Github, GISAID, GenBank
     create_github_meta(ans, released_samples_fpath, git_meta_cols)
     create_gisaid_meta(ans, gisaid_meta_cols)
     assemble_genbank_release(cns_seqs, ans, genbank_meta_cols,
                              out_dir / 'genbank')
     sra_dir = out_dir / 'sra'
     if not Path.isdir(sra_dir):
         Path.mkdir(sra_dir)
     input(
         f"\n Have you received the BioSample.txt files and placed them inside {sra_dir}? \n Press Enter to continue..."
     )
     create_sra_meta(ans, sra_dir)
Example #28
0
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from path import Path

ercc = {}
for cf in snakemake.input.counts:
    cf = Path(cf)
    name = cf.basename().replace('.count', '')
    if cf.getsize() == 0:
        ercc[name] = pd.Series()
    else:
        try:
            c = pd.read_csv(cf, sep="\t", index_col=0, header=None)[1]
            ercc[name] = c
        except:
            print("Error reading %s" % cf)
            exit(-1)

ercc = pd.DataFrame(ercc).fillna(0)
try:
    ercc = ercc.astype(int)
except:
    print(ercc.iloc[:5,:5])
    print(ercc.iloc[-5:,-5:])
    raise
ercc.index.name = 'ercc'
ercc_rpm = 1e6 * (ercc / ercc.sum())


groups = pd.read_csv('group.tsv', sep="\t", index_col=0, names=['group'])
Example #29
0
#!/usr/bin/python

import sys
from PIL import Image, ImageFilter
from path import Path

# Take the path to the image file as an argument
imgPath = Path(sys.argv[1])
print imgPath, imgPath.basename()

# We first apply the CONTOUR filter to get the contours
# Then convert the image to black n white
# Traverse the pixels to get max and min positions of Black
img = Image.open(imgPath)
contourImg = img.filter(ImageFilter.CONTOUR)
binaryImg = contourImg.convert("1")
rows, cols = binaryImg.size

imgData = binaryImg.load()
edgeDataX = []
edgeDataY = []

print "Image size = (%d,%d)" % (rows, cols)

for x in xrange(rows):
    for y in xrange(cols):
        if imgData[x, y] == 0:
            edgeDataX.append(x)
            edgeDataY.append(y)

print "\nEdge Points of cropped image: "
Example #30
0
import numpy as np
#import matplotlib.pyplot as plt
import pandas as pd
from path import Path

ercc = {}
for cf in snakemake.input.counts:
    cf = Path(cf)
    name = cf.basename().replace('.count', '')
    if cf.getsize() == 0:
        ercc[name] = pd.Series()
    else:
        try:
            c = pd.read_csv(cf, sep="\t", index_col=0, header=None)[1]
            ercc[name] = c
        except:
            print("Error reading %s" % cf)
            exit(-1)

ercc = pd.DataFrame(ercc).fillna(0)
try:
    ercc = ercc.astype(int)
except:
    print(ercc.iloc[:5, :5])
    print(ercc.iloc[-5:, -5:])
    raise
ercc.index.name = 'ercc'
ercc_rpm = 1e6 * (ercc / ercc.sum())

groups = pd.read_csv('group.tsv', sep="\t", index_col=0, names=['group'])
erccg = ercc.T
Example #31
0
#!/usr/bin/python

import sys
from PIL import Image, ImageFilter
from path import Path

# Take the path to the image file as an argument
imgPath = Path(sys.argv[1])
print imgPath, imgPath.basename()

# We first apply the CONTOUR filter to get the contours
# Then convert the image to black n white
# Traverse the pixels to get max and min positions of Black
img = Image.open(imgPath)
contourImg = img.filter(ImageFilter.CONTOUR)
binaryImg = contourImg.convert("1")
rows, cols = binaryImg.size

imgData = binaryImg.load()
edgeDataX = []
edgeDataY = []

print "Image size = (%d,%d)" % (rows, cols)

for x in xrange(rows):
    for y in xrange(cols):
        if imgData[x,y] == 0:
            edgeDataX.append(x)
            edgeDataY.append(y)

print "\nEdge Points of cropped image: "
Example #32
0
File: job.py Project: mfiers/kea3
    def get_template(self):
        """
        Find, copy and load the template
        """
        if '~' in self.template:
            self.template = str(Path(self.template).expanduser())

        if Path(self.template).isdir():
            if self.transient:
                lg.error("must specify a template name or file, not a dir")
                lg.error("when in transient mode")
                exit(-1)

            k3dir = Path(self.template) / 'k3'
            subdirs = k3dir.dirs() if k3dir.exists() else []
            if len(subdirs) == 0:
                lg.warning("No template found")
                exit(-1)
            if len(subdirs) > 1:
                lg.error("Multiple templates found: %s",
                         ", ".join([x.basename() for x in subdirs]))
                lg.error("Please specify one")
                exit()
            self.name = subdirs[0].basename()
            lg.info('template "%s" found in  %s', self.name, self.template)

            if not self.template == '.':
                # not pointing at the current folder
                # copy the template (& argument file)
                template_file = Path(self.template) / 'k3' / \
                    self.name / 'template.k3'

                if template_file != self.template_file:
                    template_file.copy(self.template_file)

        elif not self.template.endswith('.k3') and \
                re.match('[A-Za-z_]\w*', self.template):
            if (Path('k3') / self.template / 'template.k3').exists():
                if self.transient:
                    lg.error("Not possible to use k3/template")
                    lg.error("when in transient mode")
                    exit(-1)

                # the template is present
                self.name = self.template
            else:
                template_file = Path(self.app.conf['default_template_dir'])\
                    .expanduser() / ('%s.k3' % self.template)
                if not template_file.exists():
                    lg.error("Cannot find template")
                    exit(-1)
                self.name = self.template
                self.retrieve_template_file(template_file)

        elif Path(self.template).exists() and self.template.endswith('.k3'):
            # template points to a file - get it
            lg.debug("Found template file: %s", self.template)
            template_file = Path(self.template)
            self.name = template_file.basename().replace('.k3', '')
            self.retrieve_template_file(template_file)
        else:
            raise NotImplementedError("Need other source for templates: %s",
                                      self.template)

        lg.debug("Template name is: %s", self.name)
        if not self.template_file.exists():
            lg.error("No valid template found in %s", self.workdir)
            exit(-1)

        lg.debug("Found template: %s", self.name)

        self.ctx['template']['name'] = self.name
Example #33
0
File: job.py Project: mfiers/kea3
    def get_template(self):
        """
        Find, copy and load the template
        """
        if '~' in self.template:
            self.template = str(Path(self.template).expanduser())

        if Path(self.template).isdir():
            if self.transient:
                lg.error("must specify a template name or file, not a dir")
                lg.error("when in transient mode")
                exit(-1)

            k3dir = Path(self.template) / 'k3'
            subdirs = k3dir.dirs() if k3dir.exists() else []
            if len(subdirs) == 0:
                lg.warning("No template found")
                exit(-1)
            if len(subdirs) > 1:
                lg.error("Multiple templates found: %s",
                         ", ".join([x.basename() for x in subdirs]))
                lg.error("Please specify one")
                exit()
            self.name = subdirs[0].basename()
            lg.info('template "%s" found in  %s', self.name, self.template)

            if not self.template == '.':
                # not pointing at the current folder
                # copy the template (& argument file)
                template_file = Path(self.template) / 'k3' / \
                    self.name / 'template.k3'

                if template_file != self.template_file:
                    template_file.copy(self.template_file)

        elif not self.template.endswith('.k3') and \
                re.match('[A-Za-z_]\w*', self.template):
            if (Path('k3') / self.template / 'template.k3').exists():
                if self.transient:
                    lg.error("Not possible to use k3/template")
                    lg.error("when in transient mode")
                    exit(-1)

                # the template is present
                self.name = self.template
            else:
                template_file = Path(self.app.conf['default_template_dir'])\
                    .expanduser() / ('%s.k3' % self.template)
                if not template_file.exists():
                    lg.error("Cannot find template")
                    exit(-1)
                self.name = self.template
                self.retrieve_template_file(template_file)

        elif Path(self.template).exists() and self.template.endswith('.k3'):
            # template points to a file - get it
            lg.debug("Found template file: %s", self.template)
            template_file = Path(self.template)
            self.name = template_file.basename().replace('.k3', '')
            self.retrieve_template_file(template_file)
        else:
            raise NotImplementedError("Need other source for templates: %s",
                                      self.template)

        lg.debug("Template name is: %s", self.name)
        if not self.template_file.exists():
            lg.error("No valid template found in %s", self.workdir)
            exit(-1)

        lg.debug("Found template: %s", self.name)

        self.ctx['template']['name'] = self.name
Example #34
0
def plotMem(
        massifFile,
        filter,  # filter by timer name
        minTimeDiff,  # filter by difference in beginning and end
        minMemDiff,  # filter by change in memory usage
        shortTimers,  # exclude short timers
        memUnit='MB',  # unit for memory
        displayTimers=True):

    # first parse the log file valgrind created for us
    data = msparser.parse_file(massifFile)
    massifFile = Path(massifFile)
    cmd = data['cmd']
    timeUnit = data['time_unit']
    snapshots = data['snapshots']
    times = []
    memHeap = []
    for s in snapshots:
        try:
            times.append(s['time'])
            memHeap.append(formatMem(s['mem_heap'], memUnit))
        except:
            pass

    # now parse all the snapshot pairs we took in the timers
    # (We compile MueLu with MueLu_TIMEMONITOR_MASSIF_SNAPSHOTS=1 )
    snapshotPairs = []
    for f in Path('.').glob(massifFile + "*start.out"):
        fEnd = f.replace('start.out', 'stop.out')
        label = Path(f).basename().stripext().replace('.start', '')
        label = label.replace(massifFile.basename() + '.', '')
        try:
            label, counter = label.rsplit('.', 1)
        except:
            pass
        try:
            data = msparser.parse_file(f)
            dataEnd = msparser.parse_file(fEnd)
            assert data['time_unit'] == timeUnit
            assert dataEnd['time_unit'] == timeUnit
            data = data['snapshots']
            dataEnd = dataEnd['snapshots']
            assert (len(data)) == 1
            assert (len(dataEnd)) == 1
            assert data[0]['time'] <= dataEnd[0]['time'], f
            data[0]['label'] = label
            data[0]['counter'] = counter
            data[0]['mem_heap'] = formatMem(data[0]['mem_heap'], memUnit)
            dataEnd[0]['mem_heap'] = formatMem(dataEnd[0]['mem_heap'], memUnit)

            times.append(data[0]['time'])
            times.append(dataEnd[0]['time'])
            memHeap.append(data[0]['mem_heap'])
            memHeap.append(dataEnd[0]['mem_heap'])

            snapshotPairs += [(data[0], dataEnd[0])]
        except FileNotFoundError:
            print(f)

    # sort the snapshots
    times = np.array(times)
    memHeap = np.array(memHeap)
    idx = np.argsort(times)
    print('maximum heap memory usage: {}'.format(memHeap.max()))
    times = times[idx]
    memHeap = memHeap[idx]

    times = times[memHeap > minMemDiff]
    memHeap = memHeap[memHeap > minMemDiff]
    assert (len(times) > 0)

    # plot the curve of memory usage
    plt.plot(times, memHeap)

    if displayTimers:
        # now, filter through the snapshot pairs
        # otherwise, the plot becomes very messy
        filter = re.compile(filter)

        told = (-2 * minTimeDiff, -2 * minTimeDiff)
        snapshotPairsNew = []
        for i, pair in enumerate(
                sorted(snapshotPairs, key=lambda x: x[0]['time'])):
            if (filter.search(pair[0]['label'])
                    and abs(pair[0]['mem_heap'] - pair[1]['mem_heap']) >
                    minMemDiff):
                t = [pair[0]['time'], pair[1]['time']]
                if (abs(t[0] - told[0]) < minTimeDiff
                        and abs(t[1] - told[1]) < minTimeDiff):
                    print('Timers "{}" and "{}" seems to coincide'.format(
                        nameold, pair[0]['label']))
                    continue
                if (t[1] - t[0] < shortTimers):
                    continue
                told = t
                nameold = pair[0]['label']
                snapshotPairsNew.append(pair)
        snapshotPairs = snapshotPairsNew

        # stack the snapshot pairs
        height = max(memHeap) / len(snapshotPairs)
        for i, pair in enumerate(
                sorted(snapshotPairs, key=lambda x: x[0]['time'])):
            plt.gca().add_patch(
                patches.Rectangle((pair[0]['time'], i * height),
                                  pair[1]['time'] - pair[0]['time'],
                                  height,
                                  alpha=0.5,
                                  facecolor='red'))
            plt.text(pair[0]['time'], (i + 0.5) * height,
                     tex_escape(pair[0]['label']))
            # add vertical lines at start and end for each timer
            plt.plot([pair[0]['time'], pair[0]['time']], [0, max(memHeap)],
                     '-',
                     c='grey',
                     alpha=0.5)
            plt.plot([pair[1]['time'], pair[1]['time']], [0, max(memHeap)],
                     '-',
                     c='grey',
                     alpha=0.5)
            # add circles on these lines for memory usage at beginning and end
            plt.scatter([pair[0]['time'], pair[1]['time']],
                        [pair[0]['mem_heap'], pair[1]['mem_heap']],
                        c='r')
    plt.xlabel(timeUnit)
    plt.ylabel(memUnit)
    plt.title(tex_escape(cmd))
Example #35
0
import os

from path import Path

##################################################################
# Application configuration
##################################################################

PROJECT_DIR = Path(__file__).abspath().realpath().dirname().parent
PROJECT_NAME = PROJECT_DIR.basename()

ROOT_URLCONF = 'mysite_backend.urls'

WSGI_APPLICATION = 'mysite_backend.wsgi.application'
HOST = os.environ.get('BACKEND_SERVER_URL', 'http://localhost:8000')

DEBUG = True

##################################################################
# Language and timezone
##################################################################

# https://en.wikipedia.org/wiki/List_of_tz_zones_by_name
TIME_ZONE = 'Asia/Taipei'

LANGUAGE_CODE = 'zh-hant'

USE_TZ = False
USE_I18N = True
USE_L10N = True