def main(gbdir, outdir): os.makedirs(gbdir, exist_ok=True) os.makedirs(outdir, exist_ok=True) tempq = 'tempquery.fasta' tempdb = 'tempdb.fasta' for org in tqdm(Organism.objects.all()): # get genbank and convert to fasta fpath = os.path.join(gbdir, '{}.gb'.format(org.accession)) if not os.path.isfile(fpath): print('\nFetching {} with accession {}'.format( org.name, org.accession)) fetch(fpath) SeqIO.convert(fpath, 'genbank', tempdb, 'fasta') # get spacers of organism and convert to fasta spacers = Spacer.objects.filter(loci__organism=org) fastatext = ''.join([ '>{}\n{}\n'.format(spacer.id, spacer.sequence) for spacer in spacers ]) with open(tempq, 'w') as f: f.write(fastatext) # run blast and save output outpath = os.path.join(outdir, '{}.json'.format(org.accession)) commandargs = [ 'blastn', '-query', tempq, '-subject', tempdb, '-out', outpath, '-outfmt', '15' ] subprocess.run(commandargs, stdout=subprocess.DEVNULL) os.remove(tempq) os.remove(tempdb)
def main(gbdir, outdir): os.makedirs(gbdir, exist_ok=True) os.makedirs(outdir, exist_ok=True) tempq = 'tempquery.fasta' tempdb = 'tempdb.fasta' for org in tqdm(Organism.objects.all()): # get genbank and convert to fasta fpath = os.path.join(gbdir, '{}.gb'.format(org.accession)) if not os.path.isfile(fpath): print('\nFetching {} with accession {}'.format( org.name, org.accession )) fetch(fpath) SeqIO.convert(fpath, 'genbank', tempdb, 'fasta') # get spacers of organism and convert to fasta spacers = Spacer.objects.filter(loci__organism=org) fastatext = ''.join(['>{}\n{}\n'.format(spacer.id, spacer.sequence) for spacer in spacers]) with open(tempq, 'w') as f: f.write(fastatext) # run blast and save output outpath = os.path.join(outdir, '{}.json'.format(org.accession)) commandargs = ['blastn', '-query', tempq, '-subject', tempdb, '-out', outpath, '-outfmt', '15'] subprocess.run(commandargs, stdout=subprocess.DEVNULL) os.remove(tempq) os.remove(tempdb)
def get_coupon_info(self): """ 获取商品信息 :param url: 请求url :param self.second_id: 商品分类id :return: """ # 获取links.bin中的url urls = read_file_to_url(LINKS_BIN) for url in urls: m = url.replace("\n", "").split("-") url = m[0] first_id = m[1] second_id = self.get_id_for_url(url) page = 0 while True: try: resp = fetch(self.get_url.format(id=second_id, page=page)) except RequestException as e: resp = fetch(self.get_url.format(id=second_id, page=page)) log.logging.info('[warn] ineffective:{0}'.format(e)) if resp.text[0] == "<" or len(resp.json().get( 'data')['coupon_list']) == CouponList.ZERO: log.logging.info( '[INFO] Get {0} success'.format(second_id)) break else: if resp: try: if resp.json().get('data'): log.logging.info('[INFO]page {0}'.format(page)) coupon = Coupon() for info in resp.json().get( 'data')['coupon_list']: coupon.second_id = second_id coupon.first_id = first_id coupon.title = info['title'] coupon.price = info['raw_price'] coupon.url = info['url'] coupon.thumbnail_pic = info[ 'thumbnail_pic'] if Goods.save_coupon(coupon): log.logging.info( '[INFO] {0} save to database ok'. format(coupon.title)) else: log.logging.info( '[INFO] {0} is existed'.format( coupon.title)) page += 1 else: log.logging.error('[ERROR] {0}'.format( resp.text)) except Exception as e: log.logging.error('[ERROR] {0}'.format(e)) else: log.logging.info('[ERROR] resp is None') # 一条url处理完成以后, 从文件中删除 delete_line(LINKS_BIN, url)
def get_spacerrepeatfiles(): spath = os.path.join(DATA_DIR, "spacerdatabase.txt") surl = 'http://crispr.i2bc.paris-saclay.fr/crispr/BLAST/Spacer/Spacerdatabase' rpath = os.path.join(DATA_DIR, "repeatdatabase.txt") rurl = 'http://crispr.i2bc.paris-saclay.fr/crispr/BLAST/DR/DRdatabase' fetch.fetch(spath, surl) fetch.fetch(rpath, rurl) return spath, rpath
def fetchgenbankfiles(gbdir='gbfiles'): os.makedirs(gbdir, exist_ok=True) for org in tqdm(Organism.objects.all()): fpath = os.path.join(gbdir, '{}.gb'.format(org.accession)) if os.path.isfile(fpath): continue print('\nFetching {} with accession {}'.format(org.name, org.accession)) fetch(os.path.join(fpath, '{}.gb'.format(org.accession)))
def get_spacer_repeat_files(): spath = os.path.join(DATA_DIR, "spacerdatabase.txt") surl = ('http://crispr.i2bc.paris-saclay.fr/' 'crispr/BLAST/Spacer/Spacerdatabase') rpath = os.path.join(DATA_DIR, "repeatdatabase.txt") rurl = 'http://crispr.i2bc.paris-saclay.fr/crispr/BLAST/DR/DRdatabase' fetch.fetch(spath, surl) fetch.fetch(rpath, rurl) return spath, rpath
def update(): if not os.path.isdir(STAT_PATH): os.mkdir(STAT_PATH) inputstr = '{' result = json.loads(fetch("get", "problem?offset=0&limit=200", "{}").text) counter_1 = 1 for i in range(0, len(result['data']['results'])): real_id = result['data']['results'][i]['id'] display_id = result['data']['results'][i]['_id'].replace(" ", "_") if counter_1 != 1: inputstr += ',' inputstr += '"' + str(display_id) + '":{"_id":"' + str(real_id) + '"}' counter_1 += 1 inputstr += '}' f = open(os.path.join(STAT_PATH, "problem_mapping.json"), 'w') f.write(inputstr) f.close print("Updated problems successfully!") inputstr = '{' result = json.loads( fetch("get", "contests?offset=0&limit=10&status=0", "{}").text) counter = 1 for i in range(0, len(result['data']['results'])): contestid = result['data']['results'][i]['id'] payload = {"contest_id": str(contestid)} endpoint = "contest/problem?contest_id=" + str(contestid) result2 = json.loads( fetch("get", "contest/problem?contest_id=" + str(contestid), payload).text) if result2["error"] == "error": print("Error : " + result2["data"]) continue q_string3 = result['data']['results'][i]["title"] q_string2 = "" for q1 in q_string3.split(" "): try: q_string2 += q1 + " " except: q_string2 += "XX " q_string = result2['data'][0]['_id'] _pid = q_string.split()[0] + "+" + q_string.split()[1] print("Found HomeWork: " + "hw" + str(counter) + " [" + q_string2 + "]") if counter != 1: inputstr += ',' inputstr += '"hw' + str(counter) + '":{"contest_name":"' + str( q_string2) + '","contest_id":' + str( contestid) + ',"contest_problem_id":"' + str( _pid) + '","problem_id":' + str( result2["data"][0]["id"]) + '}' counter += 1 inputstr += '}' f = open(os.path.join(STAT_PATH, "assign_mapping.json"), 'w') f.write(inputstr) f.close print("Updated assign successfully!")
def fetch_genbank_files(gbdir='gbfiles'): os.makedirs(gbdir, exist_ok=True) for org in tqdm(Organism.objects.all()): fpath = os.path.join(gbdir, '{}.gb'.format(org.accession)) if os.path.isfile(fpath): continue print('\nFetching {} with accession {}'.format( org.name, org.accession )) fetch(os.path.join(fpath, '{}.gb'.format(org.accession)))
def get_category(self, url): """ 获取分类 :param url:一级分类的url :return: """ try: resp = fetch(url).text except RequestException as e: resp = fetch(url).text log.logging.info('[warn] ineffective:{0}'.format(e)) html = etree.HTML(resp) childs = html.xpath('/html/body/div[4]/div[1]/div/a') for rc in childs[::-1]: log.logging.info('[INFO] Get url: {0} >>> {1}'.format( rc.attrib['href'], rc.text)) url = urllib.parse.urljoin(url, rc.attrib['href']) url_join = url + "-" + self.first_id if self.get_id_for_url(url): write_links(LINKS_BIN, url_join) return
def get(assign_name): with open(os.path.join(STAT_PATH, "assign_mapping.json"), "rt") as json_in: assign_to_config = json.load(json_in) if assign_name not in assign_to_config: print("Invalid Assign Number!") print("Available names are:") for hwmap in assign_to_config: print("- " + hwmap + " [" + assign_to_config[hwmap]['contest_name'] + "]") print( "If you want to update latest homework assignment, type: [oj update] to update." ) return contest_id, problem_id = ( assign_to_config[assign_name]["contest_id"], assign_to_config[assign_name]["contest_problem_id"], ) endpoint = "contest/problem?contest_id={}&problem_id={}".format( contest_id, problem_id) result = json.loads(fetch("get", endpoint, {}).text) data = result["data"] if not data: print("Unexpected Error with Server") return try: samples = data["samples"] except: print("Unexpected Error in Parsing Response") print(data) return template = None if "C" in data["template"]: template = data["template"]["C"] else: template = "#include <stdio.h>\n\nint main() {\n \n return 0;\n}\n" dir_name = data["_id"].split(' ')[1] print("Made a [{}] folder in your current directory.".format(dir_name)) template_path = dir_name + "/main.c" if not os.path.isdir(dir_name): os.mkdir(dir_name) with open(template_path, "wt") as fout: fout.write(template) for idx, sample in enumerate(samples): sample_num = idx + 1 input_sample_path = dir_name + "/" + "{}.in".format(sample_num) input_ = sample["input"] with open(input_sample_path, "wt") as fout: fout.write(input_) output_sample_path = dir_name + "/" + "{}.out".format(sample_num) output = sample["output"] with open(output_sample_path, "wt") as fout: fout.write(output)
def submit(assign_number, filename): with open(os.path.join(STAT_PATH, "assign_mapping.json"), "rt") as json_in: assign_to_config = json.load(json_in) if assign_number not in assign_to_config: print("Invalid Assign Number!") print("Available names are:") for hwmap in assign_to_config: print("- " + hwmap + " [" + assign_to_config[hwmap]['contest_name'] + "]") print( "If you want to update latest homework assignment, type: [oj update] to update." ) return contest_id, problem_id = ( assign_to_config[assign_number]["contest_id"], assign_to_config[assign_number]["problem_id"], ) try: with open(filename, "r") as fin: code = fin.read() except IOError: print('File "' + filename + '" does not exist!') return payload = { "problem_id": problem_id, "language": "C", "code": code, "contest_id": contest_id, } try: submission_response = json.loads( fetch("post", "submission", payload).text) except ValueError: print("No response is received! Please contact class TA!") response_data = submission_response["data"] if response_data == "The contest have ended": print("The contest has ended.") return try: submission_id = response_data["submission_id"] except TypeError: if submission_response["error"] == "invalid-code": print("You can't submit empty file.'") return print("Unknown error occuried!") return print("Submit successfully!\n" "Getting submission status...") time.sleep(1.0) status(submission_id)
def init_category(self): """ 获取所有的一级分类 :return: """ log.logging.info('[INFO] Get category') url = self.site_url try: res = fetch(url).text except RequestException as e: res = fetch(url).text log.logging.info('[warn] ineffective:{0}'.format(e)) html = etree.HTML(res) # 一级分类 root_brother = html.xpath("/html/body/div[3]/div/div/a") # 所有的一级分类请求url for rb in root_brother[::-1]: log.logging.info('[INFO] Get url: {0} >>> {1}'.format( rb.attrib['href'], rb.text)) url = urllib.parse.urljoin(url, rb.attrib['href']) self.first_id = self.get_id_for_url(url) self.get_category(urllib.parse.urljoin(url, rb.attrib['href']))
def login(): if not os.path.isdir(STAT_PATH): os.makedirs(STAT_PATH) if not os.path.isdir(COOKIE_FOLDER_PATH): os.makedirs(COOKIE_FOLDER_PATH) username = input("username: "******"password: "******"post", "login", value) status = json.loads(r.text) print(status["data"]) if not status["error"]: f = open(os.path.join(COOKIE_FOLDER_PATH, "oj_cookies"), "w") f.write('{"csrftoken": "' + r.cookies["csrftoken"] + '" , "sessionid" : "' + r.cookies["sessionid"] + '"}') f.close