Ejemplo n.º 1
0
    def calculate(self, student_essay):
        e1 = Essay()

        e1.sentence = len(student_essay.split(".")) - 1

        e1.word = len(student_essay.split(" "))
        self.result = e1.word / e1.sentence
Ejemplo n.º 2
0
def sample_stats_json_reader(essay_path):
    #Create essay object
    essay = Essay()

    #Read essay
    essay.read_essay(essay_path)

    #Get sample list
    essay_sample_id_list = sorted(essay.get_samples())

    #Create empty sample_structure
    mapping_stats_sample_list = []

    for sample in essay_sample_id_list:
        mapping_performance_file = essay.get_arg(info_type='tree',
                                                 folder_type='analysis',
                                                 keys=[
                                                     'stats',
                                                     '{0}'.format(sample),
                                                     'replicate1',
                                                     'mapping_performance.txt'
                                                 ])

        seq_platform = essay.get_arg(
            info_type='samples',
            folder_type='{0}'.format(sample),
            keys=['replicates', 'replicate1', 'seq_platform'])

        sequencer = essay.get_arg(
            info_type='samples',
            folder_type='{0}'.format(sample),
            keys=['replicates', 'replicate1', 'sequencer'])

        size = essay.get_arg(info_type='samples',
                             folder_type='{0}'.format(sample),
                             keys=['replicates', 'replicate1', 'size'])

        mapping_stats_sample_list.append({
            sample: {
                'mapping_performance': mapping_performance_file,
                'mapping_stats': []
            }
        })

    for sample_item in mapping_stats_sample_list:
        for sample_id in sample_item.keys():
            mapping_performance_file = sample_item.get(sample_id).get(
                'mapping_performance')

            mapping_stats = sample_item.get(sample_id).get('mapping_stats')
            mapping_stats.extend(get_mapping_stats(mapping_performance_file))

    return essay_sample_id_list, mapping_stats_sample_list, seq_platform, sequencer, size
Ejemplo n.º 3
0
def read_in_essays(training=True, cwd="", debug=False):
    type = "training" if training else "test"

    print "Reading in essays from " + type + " data"

    essays = []

    if type == "training":
        for i, score in enumerate(["low", "medium", "high"]):
            filenames = glob.glob(cwd + "input/" + type + "/" + score +
                                  "/*.txt")
            essays.extend(
                map(lambda x: Essay(x, file_to_string(x), i + 1), filenames))

    else:
        filenames = glob.glob("input/" + type + "/tokenized/*.txt")
        essays = map(lambda x: Essay(x, file_to_string(x), 1), filenames)

    print str(len(essays)) + " essays read from " + type + " data"

    return essays[0] if debug else essays
Ejemplo n.º 4
0
def essay_local(file=None):
    kwargs = dict([(k, request.args.get(k)) for k in request.args])
    kwargs = dict([(k, request.args.get(k)) for k in request.args])
    logger.info(f'essay_local: file={file} kwargs={kwargs}')
    _set_logging_level(kwargs)

    # baseUrl = 'http://localhost:5000'
    markdown = get_local_markdown(file)
    if markdown:
        essay = Essay(html=markdown_to_html5(markdown), cache=cache, **kwargs)
        return (add_vue_app(essay.soup, VE_JS_LIB), 200, cors_headers)
    else:
        return 'Not found', 404
Ejemplo n.º 5
0
def sample_stats_json_reader(essay_path):
    # Create essay object
    essay = Essay()

    # Read essay
    essay.read_essay(essay_path)

    # Get sample list
    essay_sample_id_list = sorted(essay.get_samples())

    # Create empty sample_structure
    mapping_stats_sample_list = []

    for sample in essay_sample_id_list:
        mapping_performance_file = essay.get_arg(
            info_type="tree",
            folder_type="analysis",
            keys=["stats", "{0}".format(sample), "replicate1", "mapping_performance.txt"],
        )

        seq_platform = essay.get_arg(
            info_type="samples", folder_type="{0}".format(sample), keys=["replicates", "replicate1", "seq_platform"]
        )

        sequencer = essay.get_arg(
            info_type="samples", folder_type="{0}".format(sample), keys=["replicates", "replicate1", "sequencer"]
        )

        size = essay.get_arg(
            info_type="samples", folder_type="{0}".format(sample), keys=["replicates", "replicate1", "size"]
        )

        mapping_stats_sample_list.append(
            {sample: {"mapping_performance": mapping_performance_file, "mapping_stats": []}}
        )

    for sample_item in mapping_stats_sample_list:
        for sample_id in sample_item.keys():
            mapping_performance_file = sample_item.get(sample_id).get("mapping_performance")

            mapping_stats = sample_item.get(sample_id).get("mapping_stats")
            mapping_stats.extend(get_mapping_stats(mapping_performance_file))

    return essay_sample_id_list, mapping_stats_sample_list, seq_platform, sequencer, size
Ejemplo n.º 6
0
def main():
    global options, args

    # Read essay state json
    essay = Essay()
    essay.read_essay(options.essay_path)

    print 'Launching prepare4genesys jobs for essay', essay.get_name()

    # Create GeneSys compatible file
    annotation = FileData(name=essay.get_name() + '_annotation.vcf', data_type='analysis', modifier='-i ',
                          add_args='annotation', path=essay.get_path())
    genesys_psv = FileData(name=essay.get_name() + '_GeneSys.psv', data_type='trash', modifier='-o ',
                           add_args='prepare4genesys', path=essay.get_path())
    hold_jobs = essay.submit_from_essay(job_name='genesys', command='vcf2GeneSys.py', input_data=annotation,
                                        output_data=genesys_psv, module=module)

    # Export result files via jenkins task (cURL) (only if not exome)
    if essay.get_target_name() is not None and not (
            essay.get_target_name().startswith('exome') or essay.get_target_name().startswith('exoma')):
        if 'gluster2' in essay.get_path():
            param = '\'http://*****:*****@10.0.0.82:8080/view/Bioinfo/job/Pipeta_Baming_G2/buildWithParameters?token=SOCELTOKENG2&path=' + essay.get_path() + '&project=' + essay.get_project_name() + '&analysis=' + essay.get_name() + '\''
        else:
            param = '\'http://*****:*****@10.0.0.82:8080/view/Bioinfo/job/Pipeta_Baming_G1/buildWithParameters?token=SOCELTOKENG1&path=' + essay.get_path() + '&project=' + essay.get_project_name() + '&analysis=' + essay.get_name() + '\''
        hold_jobs = essay.submit_from_essay(job_name='curl2UGM', command='curl', add_args=param, module=module,
                                            hold_jobs=hold_jobs)

        # Call the vcf2DBNLVar script
        #hold_jobs.append(
        #    essay.submit_from_essay(job_name='vcf2DBNLVar', command='vcf2DBNLVar', input_data=annotation,
        #                            module=module))
    return hold_jobs
Ejemplo n.º 7
0
def essay(path=None):
    logger.info(f'essay: {path}')
    kwargs = dict([(k, request.args.get(k)) for k in request.args])
    _set_logging_level(kwargs)
    if request.method == 'OPTIONS':
        return ('', 204, cors_headers)
    else:
        raw = kwargs.pop('raw', 'false') in ('', 'true')
        site = urlparse(request.base_url).hostname
        refresh = kwargs.pop('refresh', 'false').lower() in ('true', '')
        src = None
        gdid = None
        for arg in ('src', 'gd', 'gdid', 'gdrive'):
            if arg in kwargs:
                val = kwargs.pop(arg)
                if val.startswith('https://drive.google.com'):
                    gdid = val.split('/')[5]
                elif arg == 'src':
                    src = val
                else:
                    gdid = val
        baseurl = None
        acct = None
        repo = None
        if src:
            markdown = get_markdown(src)
        elif gdid:
            markdown = get_gd_markdown(gdid)
        else:
            path_elems = path.split('/') if path else []
            logger.info(path_elems)
            if site in ('localhost', 'visual-essays.app'):
                if ENV == 'dev':
                    acct = path_elems[0] if len(
                        path_elems
                    ) > 1 else DEFAULT_ACCT if DEFAULT_ACCT else 'jstor-labs'
                    repo = path_elems[1] if len(
                        path_elems
                    ) > 1 else DEFAULT_REPO if DEFAULT_REPO else 'visual-essays'
                    path = '/'.join(path_elems[2:]) if (
                        DEFAULT_ACCT or acct == 'jstor-labs'
                    ) and len(path_elems) > 1 else '/'.join(path_elems)
                    baseurl = 'http://localhost:5000'
                    abs_path = f'{DOCS_ROOT}/{path}'
                    logger.info(
                        f'acct={acct} repo={repo} path={path} abs_path={abs_path} is_dir={os.path.isdir(abs_path)}'
                    )
                    markdown = get_local_markdown(abs_path)
                else:
                    acct = path_elems[0] if len(
                        path_elems
                    ) > 1 else DEFAULT_ACCT if DEFAULT_ACCT else 'jstor-labs'
                    repo = path_elems[1] if len(
                        path_elems
                    ) > 1 else DEFAULT_REPO if DEFAULT_REPO else 'visual-essays'
                    path = '/'.join(path_elems) if len(path_elems) == 1 or (
                        DEFAULT_ACCT and acct != DEFAULT_ACCT) else '/'.join(
                            path_elems[2:])
                    baseurl = content_baseurl(acct, repo)
                    markdown = get_gh_markdown(acct, repo, path)
            else:
                acct = KNOWN_SITES.get(site, {}).get('acct', )
                repo = KNOWN_SITES.get(site, {}).get('repo')
                baseurl = content_baseurl(acct, repo)
                markdown = get_gh_markdown(acct, repo, path)

            logger.info(
                f'essay: site={site} acct={acct} repo={repo} path={path} raw={raw} kwargs={kwargs} markdown={markdown is not None}'
            )

        if markdown:
            if raw:
                return (markdown['text'], 200, cors_headers)
            else:
                cache_key = f'{site}|{acct}|{repo}|{path}'
                cached = cache.get(
                    cache_key
                ) if not refresh and not site == 'localhost' else None
                logger.info(
                    f'essay: site={site} acct={acct} repo={repo} path={path} cached={cached and cached["sha"] == markdown.get("sha")}'
                )
                if cached and cached['sha'] == markdown.get('sha'):
                    html = cached['html']
                else:
                    essay = Essay(html=markdown_to_html5(
                        markdown, site, acct, repo, path or '/'),
                                  cache=cache,
                                  baseurl=baseurl,
                                  **kwargs)
                    html = add_vue_app(
                        essay.soup,
                        'http://localhost:8080/lib/visual-essays.js'
                        if site == 'localhost' else VE_JS_LIB)
                    if not site == 'localhost' and 'url' in markdown and 'sha' in markdown:
                        cache[cache_key] = {
                            'html': html,
                            'sha': markdown['sha']
                        }
                return (html, 200, cors_headers)
        else:
            return 'Not found', 404
Ejemplo n.º 8
0
def main():
    global options, args

    # Read essay state json
    essay = Essay()
    essay.read_essay(options.essay_path)

    print 'Launching prepare4genesys jobs for essay', essay.get_name()

    # Create GeneSys compatible file
    annotation = FileData(name=essay.get_name() + '_annotation.vcf',
                          data_type='analysis',
                          modifier='-i ',
                          add_args='annotation',
                          path=essay.get_path())
    genesys_psv = FileData(name=essay.get_name() + '_GeneSys.psv',
                           data_type='trash',
                           modifier='-o ',
                           add_args='prepare4genesys',
                           path=essay.get_path())
    hold_jobs = essay.submit_from_essay(job_name='genesys',
                                        command='vcf2GeneSys.py',
                                        input_data=annotation,
                                        output_data=genesys_psv,
                                        module=module)

    # Export result files via jenkins task (cURL) (only if not exome)
    if essay.get_target_name() is not None and not (
            essay.get_target_name().startswith('exome')
            or essay.get_target_name().startswith('exoma')):
        if 'gluster2' in essay.get_path():
            param = '\'http://*****:*****@10.0.0.82:8080/view/Bioinfo/job/Pipeta_Baming_G2/buildWithParameters?token=SOCELTOKENG2&path=' + essay.get_path(
            ) + '&project=' + essay.get_project_name(
            ) + '&analysis=' + essay.get_name() + '\''
        else:
            param = '\'http://*****:*****@10.0.0.82:8080/view/Bioinfo/job/Pipeta_Baming_G1/buildWithParameters?token=SOCELTOKENG1&path=' + essay.get_path(
            ) + '&project=' + essay.get_project_name(
            ) + '&analysis=' + essay.get_name() + '\''
        hold_jobs = essay.submit_from_essay(job_name='curl2UGM',
                                            command='curl',
                                            add_args=param,
                                            module=module,
                                            hold_jobs=hold_jobs)

        # Call the vcf2DBNLVar script
        #hold_jobs.append(
        #    essay.submit_from_essay(job_name='vcf2DBNLVar', command='vcf2DBNLVar', input_data=annotation,
        #                            module=module))
    return hold_jobs