def calculate(self, student_essay): e1 = Essay() e1.sentence = len(student_essay.split(".")) - 1 e1.word = len(student_essay.split(" ")) self.result = e1.word / e1.sentence
def sample_stats_json_reader(essay_path): #Create essay object essay = Essay() #Read essay essay.read_essay(essay_path) #Get sample list essay_sample_id_list = sorted(essay.get_samples()) #Create empty sample_structure mapping_stats_sample_list = [] for sample in essay_sample_id_list: mapping_performance_file = essay.get_arg(info_type='tree', folder_type='analysis', keys=[ 'stats', '{0}'.format(sample), 'replicate1', 'mapping_performance.txt' ]) seq_platform = essay.get_arg( info_type='samples', folder_type='{0}'.format(sample), keys=['replicates', 'replicate1', 'seq_platform']) sequencer = essay.get_arg( info_type='samples', folder_type='{0}'.format(sample), keys=['replicates', 'replicate1', 'sequencer']) size = essay.get_arg(info_type='samples', folder_type='{0}'.format(sample), keys=['replicates', 'replicate1', 'size']) mapping_stats_sample_list.append({ sample: { 'mapping_performance': mapping_performance_file, 'mapping_stats': [] } }) for sample_item in mapping_stats_sample_list: for sample_id in sample_item.keys(): mapping_performance_file = sample_item.get(sample_id).get( 'mapping_performance') mapping_stats = sample_item.get(sample_id).get('mapping_stats') mapping_stats.extend(get_mapping_stats(mapping_performance_file)) return essay_sample_id_list, mapping_stats_sample_list, seq_platform, sequencer, size
def read_in_essays(training=True, cwd="", debug=False): type = "training" if training else "test" print "Reading in essays from " + type + " data" essays = [] if type == "training": for i, score in enumerate(["low", "medium", "high"]): filenames = glob.glob(cwd + "input/" + type + "/" + score + "/*.txt") essays.extend( map(lambda x: Essay(x, file_to_string(x), i + 1), filenames)) else: filenames = glob.glob("input/" + type + "/tokenized/*.txt") essays = map(lambda x: Essay(x, file_to_string(x), 1), filenames) print str(len(essays)) + " essays read from " + type + " data" return essays[0] if debug else essays
def essay_local(file=None): kwargs = dict([(k, request.args.get(k)) for k in request.args]) kwargs = dict([(k, request.args.get(k)) for k in request.args]) logger.info(f'essay_local: file={file} kwargs={kwargs}') _set_logging_level(kwargs) # baseUrl = 'http://localhost:5000' markdown = get_local_markdown(file) if markdown: essay = Essay(html=markdown_to_html5(markdown), cache=cache, **kwargs) return (add_vue_app(essay.soup, VE_JS_LIB), 200, cors_headers) else: return 'Not found', 404
def sample_stats_json_reader(essay_path): # Create essay object essay = Essay() # Read essay essay.read_essay(essay_path) # Get sample list essay_sample_id_list = sorted(essay.get_samples()) # Create empty sample_structure mapping_stats_sample_list = [] for sample in essay_sample_id_list: mapping_performance_file = essay.get_arg( info_type="tree", folder_type="analysis", keys=["stats", "{0}".format(sample), "replicate1", "mapping_performance.txt"], ) seq_platform = essay.get_arg( info_type="samples", folder_type="{0}".format(sample), keys=["replicates", "replicate1", "seq_platform"] ) sequencer = essay.get_arg( info_type="samples", folder_type="{0}".format(sample), keys=["replicates", "replicate1", "sequencer"] ) size = essay.get_arg( info_type="samples", folder_type="{0}".format(sample), keys=["replicates", "replicate1", "size"] ) mapping_stats_sample_list.append( {sample: {"mapping_performance": mapping_performance_file, "mapping_stats": []}} ) for sample_item in mapping_stats_sample_list: for sample_id in sample_item.keys(): mapping_performance_file = sample_item.get(sample_id).get("mapping_performance") mapping_stats = sample_item.get(sample_id).get("mapping_stats") mapping_stats.extend(get_mapping_stats(mapping_performance_file)) return essay_sample_id_list, mapping_stats_sample_list, seq_platform, sequencer, size
def main(): global options, args # Read essay state json essay = Essay() essay.read_essay(options.essay_path) print 'Launching prepare4genesys jobs for essay', essay.get_name() # Create GeneSys compatible file annotation = FileData(name=essay.get_name() + '_annotation.vcf', data_type='analysis', modifier='-i ', add_args='annotation', path=essay.get_path()) genesys_psv = FileData(name=essay.get_name() + '_GeneSys.psv', data_type='trash', modifier='-o ', add_args='prepare4genesys', path=essay.get_path()) hold_jobs = essay.submit_from_essay(job_name='genesys', command='vcf2GeneSys.py', input_data=annotation, output_data=genesys_psv, module=module) # Export result files via jenkins task (cURL) (only if not exome) if essay.get_target_name() is not None and not ( essay.get_target_name().startswith('exome') or essay.get_target_name().startswith('exoma')): if 'gluster2' in essay.get_path(): param = '\'http://*****:*****@10.0.0.82:8080/view/Bioinfo/job/Pipeta_Baming_G2/buildWithParameters?token=SOCELTOKENG2&path=' + essay.get_path() + '&project=' + essay.get_project_name() + '&analysis=' + essay.get_name() + '\'' else: param = '\'http://*****:*****@10.0.0.82:8080/view/Bioinfo/job/Pipeta_Baming_G1/buildWithParameters?token=SOCELTOKENG1&path=' + essay.get_path() + '&project=' + essay.get_project_name() + '&analysis=' + essay.get_name() + '\'' hold_jobs = essay.submit_from_essay(job_name='curl2UGM', command='curl', add_args=param, module=module, hold_jobs=hold_jobs) # Call the vcf2DBNLVar script #hold_jobs.append( # essay.submit_from_essay(job_name='vcf2DBNLVar', command='vcf2DBNLVar', input_data=annotation, # module=module)) return hold_jobs
def essay(path=None): logger.info(f'essay: {path}') kwargs = dict([(k, request.args.get(k)) for k in request.args]) _set_logging_level(kwargs) if request.method == 'OPTIONS': return ('', 204, cors_headers) else: raw = kwargs.pop('raw', 'false') in ('', 'true') site = urlparse(request.base_url).hostname refresh = kwargs.pop('refresh', 'false').lower() in ('true', '') src = None gdid = None for arg in ('src', 'gd', 'gdid', 'gdrive'): if arg in kwargs: val = kwargs.pop(arg) if val.startswith('https://drive.google.com'): gdid = val.split('/')[5] elif arg == 'src': src = val else: gdid = val baseurl = None acct = None repo = None if src: markdown = get_markdown(src) elif gdid: markdown = get_gd_markdown(gdid) else: path_elems = path.split('/') if path else [] logger.info(path_elems) if site in ('localhost', 'visual-essays.app'): if ENV == 'dev': acct = path_elems[0] if len( path_elems ) > 1 else DEFAULT_ACCT if DEFAULT_ACCT else 'jstor-labs' repo = path_elems[1] if len( path_elems ) > 1 else DEFAULT_REPO if DEFAULT_REPO else 'visual-essays' path = '/'.join(path_elems[2:]) if ( DEFAULT_ACCT or acct == 'jstor-labs' ) and len(path_elems) > 1 else '/'.join(path_elems) baseurl = 'http://localhost:5000' abs_path = f'{DOCS_ROOT}/{path}' logger.info( f'acct={acct} repo={repo} path={path} abs_path={abs_path} is_dir={os.path.isdir(abs_path)}' ) markdown = get_local_markdown(abs_path) else: acct = path_elems[0] if len( path_elems ) > 1 else DEFAULT_ACCT if DEFAULT_ACCT else 'jstor-labs' repo = path_elems[1] if len( path_elems ) > 1 else DEFAULT_REPO if DEFAULT_REPO else 'visual-essays' path = '/'.join(path_elems) if len(path_elems) == 1 or ( DEFAULT_ACCT and acct != DEFAULT_ACCT) else '/'.join( path_elems[2:]) baseurl = content_baseurl(acct, repo) markdown = get_gh_markdown(acct, repo, path) else: acct = KNOWN_SITES.get(site, {}).get('acct', ) repo = KNOWN_SITES.get(site, {}).get('repo') baseurl = content_baseurl(acct, repo) markdown = get_gh_markdown(acct, repo, path) logger.info( f'essay: site={site} acct={acct} repo={repo} path={path} raw={raw} kwargs={kwargs} markdown={markdown is not None}' ) if markdown: if raw: return (markdown['text'], 200, cors_headers) else: cache_key = f'{site}|{acct}|{repo}|{path}' cached = cache.get( cache_key ) if not refresh and not site == 'localhost' else None logger.info( f'essay: site={site} acct={acct} repo={repo} path={path} cached={cached and cached["sha"] == markdown.get("sha")}' ) if cached and cached['sha'] == markdown.get('sha'): html = cached['html'] else: essay = Essay(html=markdown_to_html5( markdown, site, acct, repo, path or '/'), cache=cache, baseurl=baseurl, **kwargs) html = add_vue_app( essay.soup, 'http://localhost:8080/lib/visual-essays.js' if site == 'localhost' else VE_JS_LIB) if not site == 'localhost' and 'url' in markdown and 'sha' in markdown: cache[cache_key] = { 'html': html, 'sha': markdown['sha'] } return (html, 200, cors_headers) else: return 'Not found', 404
def main(): global options, args # Read essay state json essay = Essay() essay.read_essay(options.essay_path) print 'Launching prepare4genesys jobs for essay', essay.get_name() # Create GeneSys compatible file annotation = FileData(name=essay.get_name() + '_annotation.vcf', data_type='analysis', modifier='-i ', add_args='annotation', path=essay.get_path()) genesys_psv = FileData(name=essay.get_name() + '_GeneSys.psv', data_type='trash', modifier='-o ', add_args='prepare4genesys', path=essay.get_path()) hold_jobs = essay.submit_from_essay(job_name='genesys', command='vcf2GeneSys.py', input_data=annotation, output_data=genesys_psv, module=module) # Export result files via jenkins task (cURL) (only if not exome) if essay.get_target_name() is not None and not ( essay.get_target_name().startswith('exome') or essay.get_target_name().startswith('exoma')): if 'gluster2' in essay.get_path(): param = '\'http://*****:*****@10.0.0.82:8080/view/Bioinfo/job/Pipeta_Baming_G2/buildWithParameters?token=SOCELTOKENG2&path=' + essay.get_path( ) + '&project=' + essay.get_project_name( ) + '&analysis=' + essay.get_name() + '\'' else: param = '\'http://*****:*****@10.0.0.82:8080/view/Bioinfo/job/Pipeta_Baming_G1/buildWithParameters?token=SOCELTOKENG1&path=' + essay.get_path( ) + '&project=' + essay.get_project_name( ) + '&analysis=' + essay.get_name() + '\'' hold_jobs = essay.submit_from_essay(job_name='curl2UGM', command='curl', add_args=param, module=module, hold_jobs=hold_jobs) # Call the vcf2DBNLVar script #hold_jobs.append( # essay.submit_from_essay(job_name='vcf2DBNLVar', command='vcf2DBNLVar', input_data=annotation, # module=module)) return hold_jobs