Example #1
0
def _project_ftp(objectId):
    # Get project from server.
    Project = Object.factory('Project')
    project = Project.Query.get(objectId=objectId)

    config = Config.get()
    ftp_path = config['ftpPath']
    project_dir = config['projectDir']
    ftp_project_path = os.path.join(ftp_path, project_dir, objectId)

    # Change ftp home directory to the project root.
    cmd = (
        'pure-pw usermod {} -d {} -m -f /etc/pure-ftpd/passwd/pureftpd.passwd'
    ).format(objectId, ftp_project_path)
    try:
        ftp_name = config['repoName'] + '_' + config['ftpService'] + '_1'
        client = docker.from_env()
        ftp = client.containers.get(ftp_name)

        # run command.
        out = ftp.exec_run(cmd)
        exit_code = out[0]

        if exit_code != 0:
            raise Exception('non-zero exit code on ftp user modification')
    except Exception as e:
        print('error occured while modifying ftp user {}'.format(objectId),
              file=sys.stderr)
        raise e

    return jsonify({'result': project.paths['root']})
Example #2
0
def _sample_initialize(projId, objectId, name):
    # Get project from server.
    Project = Object.factory('Project')
    project = Project.Query.get(objectId=projId)

    config = Config.get()
    data_path = config['dataPath']
    sample_dir = config['sampleDir']
    sample_path = os.path.join(data_path, sample_dir, objectId)
    os.makedirs(sample_path, exist_ok=True)

    # Get analyses that apply to samples.
    sample_analyses = _get_analyses().filter(type='sample')

    paths = {}
    for analysis in sample_analyses:
        if analysis.code in project.paths:
            source_path = os.path.join(project.paths[analysis.code], name)
            target_path = os.path.join(sample_path, analysis.code)

            os.makedirs(source_path, exist_ok=True)
            rel_path = os.path.relpath(source_path,
                                       os.path.dirname(target_path))
            print(rel_path, target_path, file=sys.stderr)

            os.symlink(rel_path, target_path, target_is_directory=True)

            paths[analysis.code] = source_path

    return jsonify({'result': {'paths': paths}})
Example #3
0
def _project_reads(objectId):
    # Get project from server.
    Project = Object.factory('Project')
    project = Project.Query.get(objectId=objectId)

    extensions = Config.get()['readExtensions']

    reads = {}
    for root, dirs, files in os.walk(project.paths['read']):
        for f in files:
            if f.endswith(tuple(extensions)):
                name = os.path.basename(root.replace(project.paths['read'],
                                                     ''))

                # In case files are at the root.
                if name == '':
                    name = '/'

                if name not in reads:
                    reads[name] = []

                path = os.path.join(root, f)
                size = os.path.getsize(path)
                reads[name].append({'path': path, 'size': size})
                print(path, file=sys.stderr)

    return jsonify({'result': reads})
Example #4
0
def _project_email(objectId, subject, message):
    """
    Send mail with the given arguments.
    """
    Project = Object.factory('Project')
    project = Project.Query.get(objectId=objectId)

    config = Config.get()
    host = config['host']
    data_path = config['dataPath']
    email_dir = config['emailDir']
    email_path = os.path.join(data_path, email_dir)

    datetime = dt.datetime.now()
    url = 'http://{}/?id={}'.format(host, objectId)
    fr = 'AlaskaProject_{}@{}'.format(objectId, host)
    to = project.email

    format_dict = {
        'message': message,
        'objectId': objectId,
        'url': url,
        'host': host,
        'password': project.ftpPassword,
        'to': to,
        'datetime': datetime.strftime('%Y-%m-%d %H:%M:%S') + ' PDT'
    }

    # Footer that is appended to every email.
    full_message = '\
    <html> \
        <head></head> \
        <body> \
         <p>{message}</p> \
         <br> \
         <hr> \
         <p>Project ID: {objectId}<br> \
         Unique URL: <a href="{url}">{url}</a><br> \
         FTP server: {host}<br> \
         FTP port: 21<br> \
         FTP username: {objectId}<br> \
         FTP password: {password}<br> \
         This message was sent to {to} at {datetime}.<br> \
         <b>Please do not reply to this email.</b></p> \
        </body> \
    </html> \
    '.format(**format_dict)

    email = {'to': to, 'from': fr, 'subject': subject, 'message': full_message}

    email_file = '{}.json'.format(datetime)
    output_path = os.path.join(email_path, email_file)

    with open(output_path, 'w') as f:
        json.dump(email, f, indent=4)

    return jsonify({'result': email_file})
Example #5
0
def _referenceBuild(reference):
    '''
    Helper function that blocks until the given reference is built.
    '''
    # Make sure the index hasn't been built yet.
    if reference.ready:
        return

    config = Config.get()
    index_image = config['indexImage']
    data_volume = config['repoName'] + '_' + config['dataVolume']
    data_path = config['dataPath']
    script_volume = config['repoName'] + '_' + config['scriptVolume']
    script_path = config['scriptPath']
    script = config['indexScript']
    network = config['repoName'] + '_' + config['backendNetworkName']
    cpus = config['cpus']

    # begin container variables
    cmd = 'python3 {} {}'.format(script, reference.objectId)
    volumes = {
        data_volume: {
            'bind': data_path,
            'mode': 'rw'
        },
        script_volume: {
            'bind': script_path,
            'mode': 'rw'
        }
    }
    environment = {
        'PARSE_HOSTNAME': PARSE_HOSTNAME,
        'PARSE_APP_ID': PARSE_APP_ID,
        'PARSE_MASTER_KEY': PARSE_MASTER_KEY,
        'SENTRY_DSN': os.getenv('SENTRY_INDEX_DSN', ''),
        'ENVIRONMENT': os.getenv('ENVIRONMENT', 'default')
    }
    wdir = script_path
    name = 'index-{}'.format(reference.objectId)

    print(cmd, volumes, wdir, file=sys.stderr)

    # Docker client.
    client = docker.from_env()
    index_container = client.containers.run(index_image,
                                            cmd,
                                            detach=False,
                                            stderr=True,
                                            auto_remove=True,
                                            volumes=volumes,
                                            working_dir=wdir,
                                            cpuset_cpus=cpus,
                                            network=network,
                                            environment=environment,
                                            name=name)
    index_container = None
Example #6
0
def _project_initialize(objectId):
    config = Config.get()
    data_path = config['dataPath']
    project_dir = config['projectDir']
    read_dir = config['readDir']
    ftp_path = config['ftpPath']
    project_archive = config['projectArchive']

    # Make directories.
    root_path = os.path.join(data_path, project_dir, objectId)
    read_path = os.path.join(root_path, read_dir)
    ftp_project_path = os.path.join(ftp_path, project_dir, objectId)
    ftp_read_path = os.path.join(ftp_project_path, read_dir)
    paths = {'root': root_path, 'read': read_path}

    # Make sure this is actually a new project.
    if os.path.exists(root_path):
        return jsonify({'error': 'root folder exists'})

    for _, path in paths.items():
        os.makedirs(path, exist_ok=True)

    # Make UPLOAD_HERE file
    upload_here = os.path.join(read_path, 'UPLOAD_HERE')
    with open(upload_here, 'w') as f:
        f.write('')

    # Make ftp user.
    # Generate random password
    passwd = _generate_password(5)

    # begin container variables
    cmd = (
        '/bin/bash -c "chmod -R 0777 {} && (echo {}; echo {}) | pure-pw useradd {} -m -f /etc/pure-ftpd/passwd/pureftpd.passwd '
        + '-u ftpuser -d {}"').format(ftp_project_path, passwd, passwd,
                                      objectId, ftp_read_path)
    print(cmd, file=sys.stderr)

    try:
        ftp_name = config['repoName'] + '_' + config['ftpService'] + '_1'
        client = docker.from_env()
        ftp = client.containers.get(ftp_name)

        # run command.
        out = ftp.exec_run(cmd)
        exit_code = out[0]

        if exit_code != 0:
            raise Exception('non-zero exit code on ftp user creation')
    except Exception as e:
        print('error occured while making ftp user {}'.format(objectId),
              file=sys.stderr)
        raise e

    return jsonify({'result': {'paths': paths, 'ftpPassword': passwd}})
Example #7
0
def _project_sleuth(objectId, port):
    # Get project from server.
    Project = Object.factory('Project')
    project = Project.Query.get(objectId=objectId)

    # Check if there is a sleuth container open for this project.
    config = Config.get()
    data_volume = config['repoName'] + '_' + config['dataVolume']
    data_path = config['dataPath']
    script_volume = config['repoName'] + '_' + config['scriptVolume']
    script_path = config['scriptPath']
    network = config['repoName'] + '_' + config['backendNetworkName']
    shiny_script = config['shinyScript']
    so_path = project.files[config['diffDir']]['sleuth']

    # Start a new docker container.
    cmd = 'Rscript {} -p {} --alaska'.format(shiny_script, so_path)
    volumes = {
        data_volume: {
            'bind': data_path,
            'mode': 'rw'
        },
        script_volume: {
            'bind': script_path,
            'mode': 'rw'
        }
    }
    environment = {
        'PARSE_HOSTNAME': PARSE_HOSTNAME,
        'PARSE_APP_ID': PARSE_APP_ID,
        'PARSE_MASTER_KEY': PARSE_MASTER_KEY
    }
    ports = {42427: port}
    wdir = script_path
    name = 'shiny-{}'.format(project.objectId)

    # Docker client.
    client = docker.from_env()
    container = client.containers.run(config['diffImage'],
                                      cmd,
                                      detach=True,
                                      auto_remove=True,
                                      volumes=volumes,
                                      working_dir=wdir,
                                      network=network,
                                      environment=environment,
                                      name=name,
                                      ports=ports)

    return jsonify(
        {'result': {
            'containerId': container.id,
            'containerName': name
        }})
Example #8
0
def upload(project, host, username, password, fname):
    print('uploading project {}'.format(project.objectId))
    archive_path = project.files['geo']
    geo_dir = Config.get()['geoDir']

    # Open a new FTP connection.
    try:
        with ftplib.FTP(host, username, password) as conn:
            conn.cwd(geo_dir)

            with open(archive_path, 'rb') as f:
                conn.storbinary('STOR {}'.format(fname), f)
    except Exception as e:
        raise Exception('error occured while uploading {}'.format(
            project.objectId))
Example #9
0
def _sample_citation(objectId):
    # Get project from server.
    Sample = Object.factory('Sample')
    sample = Sample.Query.get(objectId=objectId)

    config = Config.get()

    genus = sample.reference.organism.genus
    species = sample.reference.organism.species
    ref_version = sample.reference.version

    arg = '-b {} --bias'.format(config['kallistoBootstraps'])

    if sample.readType == 'single':
        arg += ' --single -l {} -s {}'.format(sample.readLength,
                                              sample.readStd)

    format_dict = {
        'genus': genus,
        'species': species,
        'ref_version': ref_version,
        'arg': arg,
        **config
    }

    info = [
        'RNA-seq data was analyzed with the Alaska pipeline (alaska.caltech.edu).',
        ('Quality control was performed using using Bowtie2 (v{versionBowtie}), '
         'Samtools (v{versionSamtools}), RSeQC (v{versionRseqc}), '
         'FastQC (v{versionFastqc}), with results aggregated with '
         'MultiQC (v{versionMultiqc}).').format(**format_dict),
        ('Reads were aligned to the {genus} {species} genome version {ref_version} '
         'as provided by Wormbase using Kallisto (v{versionKallisto}) with the following '
         'flags: {arg}').format(**format_dict),
        ('Differential expression analyses with Sleuth (v{versionSleuth}) '
         'were performed using a Wald Test corrected for multiple-testing.'
         ).format(**format_dict)
    ]

    if genus == 'caenorhabditis' and species == 'elegans':
        info.append(
            'Enrichment analysis was performed using the Wormbase Enrichment Suite.'
        )

    return jsonify({'result': info})
Example #10
0
def check_images():
    index_image = Config.get()['indexImage']

    Analysis = Object.factory('Analysis')
    images = list(
        analysis.image
        for analysis in Analysis.Query.filter(active=True)) + [index_image]

    client = docker.from_env()

    for image in images:
        print(image, flush=True)
        try:
            client.images.get(image)
        except Exception as e:
            capture_exception(e)
            print('error while checking image {}'.format(image))
            sys.exit(1)
Example #11
0
def _project_upload(project, host, username, password, geo_username):
    objectId = project.objectId
    with app.app_context():
        try:
            _project_email(
                objectId, 'Submission started for project {}'.format(objectId),
                ('Alaska has started submitting project {} to the GEO. '
                 'You may view the progress of your upload through the '
                 'public GEO FTP.').format(objectId))

            file = '{}_files.tar.gz'.format(geo_username)
            with configure_scope() as scope:
                scope.set_tag('upload', objectId)
                upload(project, host, username, password, file)

            # Once done, update progress.
            project.progress = 'uploaded'
            project.save()

            _project_email(
                objectId,
                'Submission finished for project {}'.format(objectId),
                ('Alaska has finished submission of project {} to the GEO.<br>'
                 'Please fill out this form: <a href="mailto:{}">GEO submission form</a> '
                 'with the following information:<br>'
                 '1) Select <i>Notify GEO about your FTP file transfer</i><br>'
                 '2) Select <i>Yes, all my data have finished transferring</i><br>'
                 '3) The name of the uploaded file is: <strong>{}</strong><br>'
                 '4) Select <i>New</i> as the submission kind.<br>'
                 '5) Select your preferred release date.<br>'
                 'Failure to submit this form may result in the removal '
                 'of your data!').format(objectId,
                                         Config.get()['geoForm'], file))
        except Exception as e:
            project.progress = 'compiled'
            project.save()
            _project_email(objectId, 'Upload failed for project {}'.format(
                objectId
            ), ('Alaska encountered an error while uploading project {} to the GEO.'
                '<br>{}<br>'
                'Please submit an issue on Github if '
                'this keeps happening.').format(objectId, str(e)))
Example #12
0
def send_reset_email():
    data = request.get_json()
    to = data['email']
    fr = '*****@*****.**'
    datetime = dt.datetime.now()

    config = Config.get()
    host = config['host']
    data_path = config['dataPath']
    email_dir = config['emailDir']
    email_path = os.path.join(data_path, email_dir)
    key = ''
    for i in range(24):
        key += str(random.choice(string.digits))
    reset[key] = to

    url = 'http://{}/webhook/reset/verify/{}'.format(host, key)

    subject = 'Password reset verification for Alaska'
    message = (
        '<html><head></head><body>'
        'Please click on the following link to complete password reset.<br>'
        '<a href="{}">{}</a><br>'
        'If you did not make this request, please do not click on the link.<br>'
        'This message was sent to {} at {}.<br>'
        '<b>Please do not reply to this email.</b></body>').format(
            url, url, to, datetime.strftime('%Y-%m-%d %H:%M:%S'))

    email = {'to': to, 'from': fr, 'subject': subject, 'message': message}

    email_file = '{}.json'.format(datetime)
    output_path = os.path.join(email_path, email_file)

    with open(output_path, 'w') as f:
        json.dump(email, f, indent=4)

    return jsonify({'result': email_file})
Example #13
0
def reset_notify():
    data = request.get_json()
    to = data['email']
    fr = '*****@*****.**'
    datetime = dt.datetime.now().strftime('%Y%m%d_%H%M%S')

    config = Config.get()
    host = config['host']
    data_path = config['dataPath']
    email_dir = config['emailDir']
    email_path = os.path.join(data_path, email_dir)

    subject = 'Password reset for Alaska'
    message = 'Your password for Alaska has been reset.'

    email = {'to': to, 'from': fr, 'subject': subject, 'message': message}

    email_file = '{}.json'.format(datetime)
    output_path = os.path.join(email_path, email_file)

    with open(output_path, 'w') as f:
        json.dump(email, f, indent=4)

    return jsonify({'result': email_file})
Example #14
0
def run_post(project, code='post', requires='diff'):
    print_with_flush('# starting post for project {}'.format(project.objectId))

    organism = project.relation('samples').query()[0].reference.organism
    if organism.genus != 'caenorhabditis' or organism.species != 'elegans':
        print_with_flush('# Currently, post analysis is only supported for '
                         'C. elegans')
        return

    config = Config.get()
    q_threshold = config['qThreshold']
    tea_types = config['teaTypes']

    diff_path = project.paths[requires]
    post_path = project.paths[code]

    for file in os.listdir(diff_path):
        file_name = os.path.splitext(os.path.basename(file))[0]
        file_path = os.path.join(diff_path, file)

        if file.startswith('betas') and file.endswith('.csv'):
            df = pd.read_csv(file_path, index_col=0)
            gene_list = df[df.qval < q_threshold].ens_gene

            # Skip if gene list is empty.
            if len(gene_list) == 0:
                print_with_flush(
                    ('# there are no genes with q < {} in ' + '{}!').format(
                        q_threshold, file))
                print_with_flush('# this means there are no significantly ' +
                                 'differentially-expressed genes for ' +
                                 'this set of conditions.')
                continue

            for tea_type in tea_types:
                tea_file = '{}_{}'.format(
                    file_name.replace('betas_wt', 'enrichment'), tea_type)
                tea_title = os.path.join(post_path, tea_file)
                print_with_flush(
                    ('# performing {} enrichment analysis ' + 'for {}').format(
                        tea_type, file))
                df_dict = tea.fetch_dictionary(tea_type)
                df_results = tea.enrichment_analysis(gene_list,
                                                     df_dict,
                                                     aname=tea_title + '.csv',
                                                     save=True,
                                                     show=False)
                tea.plot_enrichment_results(df_results,
                                            analysis=tea_type,
                                            title=tea_title,
                                            save=True)

    # Archive.
    archive_path = archive(project, code)

    if code not in project.files:
        project.files[code] = {}
    project.files[code]['archive'] = archive_path
    project.save()

    print_with_flush('# done')
Example #15
0
def wait():
    # Get wait time.
    interval = Config.get()['workerInterval']
    time.sleep(interval)
Example #16
0
def start():
    global container

    while True:
        # Dequeue job.
        job = dequeue()

        if job:
            try:
                project = job.project
                analysis = job.analysis
                print('Retrieved job {} for project {}'.format(
                    job.objectId, project.objectId),
                      flush=True)

                # Make directory if it doesn't exist.
                if analysis.code not in project.paths:
                    path = os.path.join(project.paths['root'], analysis.code)
                    os.makedirs(path, exist_ok=True)

                    project.paths[analysis.code] = path
                    project.save()
                # Also for each sample, if it needs one.
                if analysis.type == 'sample':
                    samples = project.relation('samples').query()

                    for sample in samples:
                        if analysis.code not in sample.paths:
                            path = os.path.join(project.paths[analysis.code],
                                                sample.name)
                            os.makedirs(path, exist_ok=True)

                            sample.paths[analysis.code] = path
                            sample.save()

                config = Config.get()
                data_volume = config['repoName'] + '_' + config['dataVolume']
                data_path = config['dataPath']
                script_volume = config['repoName'] + '_' + config[
                    'scriptVolume']
                script_path = config['scriptPath']
                network = config['repoName'] + '_' + config[
                    'backendNetworkName']
                cpus = config['cpus']

                # begin container variables.
                cmd = 'python3 -u {} {} {}'.format(analysis.script,
                                                   project.objectId,
                                                   analysis.code)
                if getattr(analysis, 'requires', None) is not None:
                    cmd += ' ' + analysis.requires.code
                if job.archive:
                    cmd += ' --archive'

                volumes = {
                    data_volume: {
                        'bind': data_path,
                        'mode': 'rw'
                    },
                    script_volume: {
                        'bind': script_path,
                        'mode': 'rw'
                    }
                }
                environment = {
                    'PARSE_HOSTNAME': PARSE_HOSTNAME,
                    'PARSE_APP_ID': PARSE_APP_ID,
                    'PARSE_MASTER_KEY': PARSE_MASTER_KEY,
                    'ENVIRONMENT': os.getenv('ENVIRONMENT', 'default'),
                    'SENTRY_QC_DSN': os.getenv('SENTRY_QC_DSN', ''),
                    'SENTRY_QUANT_DSN': os.getenv('SENTRY_QUANT_DSN', ''),
                    'SENTRY_DIFF_DSN': os.getenv('SENTRY_DIFF_DSN', ''),
                    'SENTRY_POST_DSN': os.getenv('SENTRY_POST_DSN', '')
                }
                wdir = script_path
                name = '{}-{}'.format(analysis.code, project.objectId)

                # output path.
                output_file = '{}_output.txt'.format(analysis.code)
                output_path = os.path.join(project.paths[analysis.code],
                                           output_file)
                job.outputPath = output_path
                start = time.time()
                job.save()

                # Remove output file if it already exists.
                if os.path.exists(output_path):
                    os.remove(output_path)

                progress = config['progress']
                key = analysis.code + '_started'
                if key in progress:
                    project.oldProgress = progress[key]
                    project.save()

                # Docker client.
                client = docker.from_env()
                container = client.containers.run(analysis.image,
                                                  cmd,
                                                  detach=True,
                                                  auto_remove=True,
                                                  volumes=volumes,
                                                  working_dir=wdir,
                                                  cpuset_cpus=cpus,
                                                  network=network,
                                                  environment=environment,
                                                  name=name)
                print('started container with id {} and name {}'.format(
                    container.id, name))
                hook = container.logs(stdout=True, stderr=True, stream=True)
                for line in hook:
                    decoded = line.decode('utf-8').strip().encode(
                        'ascii', 'ignore').decode('ascii')

                    if '\n' in decoded:
                        outs = decoded.split('\n')
                    else:
                        outs = [decoded]

                    for out in outs:
                        # Detect commands.
                        if out.startswith('##'):
                            job.commands.append(out.strip('# '))
                            job.save()

                        # Save output.
                        print(out, flush=True)
                        with open(output_path, 'a') as f:
                            f.write(out + '\n')

                # Container finished.
                exitcode = container.wait()['StatusCode']
                runtime = time.time() - start

                if exitcode != 0:
                    log = container.attach(stdout=True,
                                           stderr=True,
                                           stream=False,
                                           logs=True)
                    msg = 'container {} exited with code {}\n{}'.format(
                        name, exitcode, log)
                    raise Exception(msg)
                else:
                    print('{} success'.format(container.name))
                    Function('jobSuccess')(objectId=job.objectId,
                                           runtime=runtime)
                    continue
            except Exception as e:
                capture_exception(e)
                print(traceback.format_exc(), file=sys.stderr, flush=True)

                # Notify that there was an error.
                Function('jobError')(objectId=job.objectId)

            finally:
                container = None

        # Wait.
        wait()
Example #17
0
if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser(description='Build indices.')
    parser.add_argument('objectId',
                        type=str,
                        help='objectId of the reference for which to build ' +
                        'the index')
    args = parser.parse_args()
    objectId = args.objectId

    with configure_scope() as scope:
        scope.user = {'id': objectId}

        # Get number of threads.
        config = Config.get()
        nthreads = config['threads']

        # Get reference object.
        Reference = Object.factory('Reference')
        reference = Reference.Query.get(objectId=objectId)

        # Build bowtie2 index.
        build_bowtie2(reference, nthreads)

        # Build kallisto index.
        build_kallisto(reference, nthreads)

        # Success. This reference is ready to be used.
        reference.ready = True
        reference.save()
Example #18
0
def _project_citation(objectId):
    # Get project from server.
    Project = Object.factory('Project')
    project = Project.Query.get(objectId=objectId)

    config = Config.get()
    citation_file = config['citationFile']
    citation_path = os.path.join(project.paths['root'], citation_file)

    samples = project.relation('samples').query()

    args = ''
    genus = ''
    species = ''
    ref_version = ''
    for sample in samples:
        genus = sample.reference.organism.genus
        species = sample.reference.organism.species
        ref_version = sample.reference.version

        arg = '-b {} --bias'.format(config['kallistoBootstraps'])

        if sample.readType == 'single':
            arg += ' --single -l {} -s {}'.format(sample.readLength,
                                                  sample.readStd)

        args += '{}({}):\t{}.\n'.format(sample.objectId, sample.name, arg)

    format_dict = {
        'factor': str(len(project.factors)),
        'genus': genus.capitalize(),
        'species': species,
        'ref_version': ref_version,
        'args': args,
        'datetime': project.createdAt,
        'id': project.objectId,
        'n_samples': len(project.relation('samples').query()),
        **config
    }

    info = ('alaska_info.txt for {id}\n'
            'This project was created on {datetime} PST with '
            '{n_samples} samples.\n\n').format(**format_dict)

    info += (
        'RNA-seq data was analyzed with Alaska using the '
        '{factor}-factor design option.\nBriefly, Alaska '
        'performs quality control using\nBowtie2 (v{versionBowtie}), '
        'Samtools (v{versionSamtools}), RSeQC (v{versionRseqc}), '
        'FastQC (v{versionFastqc}) and outputs\n'
        'a summary report generated using MultiQC (v{versionMultiqc}). Read '
        'quantification and\ndifferential expression analysis of '
        'transcripts were performed using\nKallisto (v{versionKallisto}) '
        'and Sleuth (v{versionSleuth}), respectively. '
        'Kallisto (v{versionKallisto}) was run using the\nfollowing flags for each '
        'sample:\n{args}\n'
        'Reads were aligned using\n{genus} {species} genome '
        'version {ref_version}\nas provided by Wormbase.\n\n'
        'Differential expression analyses with Sleuth (v{versionSleuth}) were '
        'performed using a\nWald Test corrected for '
        'multiple-testing.\n\n').format(**format_dict)

    # Add more info if enrichment analysis was performed.
    if genus == 'caenorhabditis' and species == 'elegans':
        info += ('Enrichment analysis was performed using the WormBase '
                 'Enrichment Suite:\n'
                 'https://doi.org/10.1186/s12859-016-1229-9\n'
                 'https://www.wormbase.org/tools/enrichment/tea/tea.cgi\n')
    # if self.epistasis:
    #     info += ('Alaska performed epistasis analyses as first '
    #              'presented in\nhttps://doi.org/10.1073/pnas.1712387115\n')

    with open(citation_path, 'w') as f:
        f.write(info)

    project.files['citation'] = citation_path
    project.save()

    return jsonify({'result': info})
Example #19
0
PARSE_APP_ID = os.getenv('PARSE_APP_ID', 'alaska')
PARSE_MASTER_KEY = os.getenv('PARSE_MASTER_KEY', 'MASTER_KEY')
print(PARSE_HOSTNAME, PARSE_APP_ID, PARSE_MASTER_KEY)

# Setup for parse_rest
os.environ["PARSE_API_ROOT"] = PARSE_HOSTNAME

from parse_rest.config import Config
from parse_rest.datatypes import Function, Object, GeoPoint
from parse_rest.connection import register, SessionToken
from parse_rest.query import QueryResourceDoesNotExist
from parse_rest.connection import ParseBatcher
from parse_rest.core import ResourceRequestBadRequest, ParseError
register(PARSE_APP_ID, '', master_key=PARSE_MASTER_KEY)

sys.path.append(Config.get()['scriptPath'])
from compile import compile
from upload import upload

compiling = {}
uploading = {}
index_container = None


def sigterm_handler(signal, frame):
    print('SIGTERM received', file=sys.stderr, flush=True)
    print(compiling, uploading, file=sys.stderr, flush=True)

    Project = Object.factory('Project')

    for objectId, t in compiling.items():
Example #20
0
if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser(description='Perform qc.')
    parser.add_argument('objectId', type=str)
    parser.add_argument('code', type=str, default='qc')
    parser.add_argument('--archive', action='store_true')
    args = parser.parse_args()

    objectId = args.objectId
    with configure_scope() as scope:
        scope.user = {'id': objectId}

        # Get number of threads.
        config = Config.get()
        nthreads = config['threads']

        code = args.code

        # Get project with specified objectId.
        Project = Object.factory('Project')
        project = Project.Query.get(objectId=objectId)

        # Run QC
        run_qc(project, code=code, nthreads=nthreads)

        # If archive = true:
        if args.archive:
            archive_path = archive_project(project,
                                           Config.get()['projectArchive'])
Example #21
0
def organismNew():
    '''
    Method to scan for new organisms.
    '''
    print('scanning for new organisms', file=sys.stderr)

    config = Config.get()
    data_path = config['dataPath']
    reference_dir = config['referenceDir']
    kallisto_dir = config['kallistoIndexDir']
    bowtie_dir = config['bowtieIndexDir']
    organism_dir = config['organismDir']
    organism_path = os.path.join(data_path, organism_dir)

    # Make the directory in case it doesn't exist.
    os.makedirs(organism_path, exist_ok=True)

    organisms = Function('getOrganismsDict')()['result']

    for genus in os.listdir(organism_path):
        genus_path = os.path.join(organism_path, genus)
        if not os.path.isdir(genus_path):
            continue

        for species in os.listdir(genus_path):
            species_path = os.path.join(genus_path, species)
            if not os.path.isdir(species_path):
                continue

            for version in os.listdir(species_path):
                version_path = os.path.join(species_path, version)
                reference_path = os.path.join(version_path, reference_dir)
                if not os.path.isdir(reference_path):
                    continue

                # Make new organism.
                Organism = Object.factory('Organism')
                if genus not in organisms or species not in organisms[genus]:
                    organism = Organism(genus=genus,
                                        species=species,
                                        path=species_path)
                    organism.save()

                    if genus not in organisms:
                        organisms[genus] = {}
                    if species not in organisms[genus]:
                        organisms[genus][species] = organism
                else:
                    # Otherwise, the organism already exists.
                    found = Organism.Query.filter(genus=genus, species=species)
                    assert (len(found) == 1)
                    organism = found[0]

                # Get all reference versions.
                references = organism.relation('references').query()
                versions = [reference.version for reference in references]

                if version not in versions:
                    # Get reference files.
                    bed = None
                    annotation = None
                    cdna = None
                    dna = None
                    for fname in os.listdir(reference_path):
                        path = os.path.join(reference_path, fname)
                        if fname.endswith('.bed'):
                            bed = path
                        elif '_annotation' in fname:
                            annotation = path
                        elif '_cdna' in fname:
                            cdna = path
                        elif '_dna' in fname:
                            dna = path

                    if bed and annotation and cdna and dna:
                        print('found {}-{}-{}'.format(genus, species, version),
                              file=sys.stderr)

                        index_prefix = '{}_{}_{}'.format(
                            genus, species, version)
                        kallisto_index_name = index_prefix + '.idx'
                        kallisto_index_path = os.path.join(
                            version_path, kallisto_dir, kallisto_index_name)
                        bowtie_index_path = os.path.join(
                            version_path, bowtie_dir, index_prefix)

                        # Paths.
                        paths = {
                            'root': version_path,
                            'dna': dna,
                            'cdna': cdna,
                            'bed': bed,
                            'annotation': annotation,
                            'kallistoIndex': kallisto_index_path,
                            'bowtieIndex': bowtie_index_path
                        }

                        # Make new reference.
                        Reference = Object.factory('Reference')
                        reference = Reference(version=version,
                                              organism=organism,
                                              paths=paths,
                                              indexBuilt=False,
                                              ready=False)
                        reference.save()
                        organism.relation('references').add([reference])

    return jsonify({'status': 'done'})