def get_suga_info(self, distinct=True, **renames): query = "SELECT GNL_PNT_UNITPRC, IC_PNT_UNITPRC, SKUB_PNT_UNITPRC, STR_YMD, END_YMD, SUGA_CD FROM PBS_SUGA_MST" self.cursor.execute(query) records = [row for row in self.cursor.fetchall()] if distinct: return Listorm(records).orderby('-STR_YMD').distinct( 'SUGA_CD').rename(**renames) return Listorm(records).rename(**renames)
def queryset_to_file(queryset, filename, selects=None, to='excel'): records = queryset.values(*selects) if selects else queryset.values() lst = Listorm(records, nomalize=False) if to == 'excel': content = lst.to_excel() else: content = lst.to_csv() return file_response(content, filename)
def _load(self): if os.path.isfile(self.path): with open(self.path) as fp: self.object_list = Listorm(json.loads(fp.read())) else: with open(self.path, 'w') as fp: fp.write(json.dumps(self.initial, indent=4)) self._load()
def get_drug_detail(drug_code, ret_kpic=False): api_content = retrieve_search_drug.get_detail_api(drug_code) records = Listorm(parse_detail_api.parse(api_content)) if ret_kpic: html_content = retrieve_search_drug.get_detail(drug_code) kpic_records = Listorm(parse_detail.parse(html_content, drug_code)) if kpic_records: ret = kpic_records.join(records, on='drug_code', how='left') return ret return records
def get_edi_code_from_gosi(xl_file): edis = Listorm() re_compile_edi = re.compile('[A-Z\d]\d{8}') wb = xlrd.open_workbook(xl_file) for sheet_index in range(wb.nsheets): sheet = wb.sheet_by_index(sheet_index) for r in range(sheet.nrows): for cell in sheet.row(r): for edi in re_compile_edi.findall(str(cell.value)): edis.append({'시트이름': sheet.name, 'EDI코드':edi}) return Listorm(edis)
def parse_detail_thread(*detail_urls): records = [] with ThreadPoolExecutor(min(MAX_WORKER, len(detail_urls))) as executor: todo_list = [] for url in detail_urls: future = executor.submit(get_detail_soup, url) todo_list.append(future) print('Parsing details...') for done in tqdm(as_completed(todo_list), total=len(detail_urls)): detail_soup = done.result() record = parse_detail_soup(detail_soup) records.append(record) lst = Listorm(records).filter(where=lambda row: row.isprofession) return lst.orderby('prd_nm_kor')
def drug_picture_view(records, columns=5, env=JINJA_ENV, output_html=None): records = Listorm(records) records = records.distinct('drug_code') template = env.get_template('drug_pictures.html') object_lists = [] for i in range(0, len(records), columns): object_lists.append(records[i:i + columns]) html = template.render(object_lists=object_lists) if output_html: with open(output_html, 'wt', encoding='utf-8') as fp: fp.write(html) else: return html
def get_tables_from_gosi(scheme, xl_file=None, **kwargs): re_compile_edi = re.compile('[A-Z\d]\d{8}') wb = xlrd.open_workbook(xl_file) if xl_file else xlrd.open_workbook( **kwargs) records = [] for nsheet in range(wb.nsheets): ws = wb.sheet_by_index(nsheet) sheet_name = ws.name columns = [] for nrow in range(ws.nrows): row_values = [cell.value for cell in ws.row(nrow)] if not columns and set(scheme) <= set(row_values): columns = row_values continue row = dict(zip(columns, row_values)) if not row: continue record = {k: row[k] for k in scheme if k in row and row[k]} record['시트명'] = sheet_name records.append(record) return Listorm(records) # def retrieve_drug_info(**renames): # conn = pymssql.connect(server='', database='', user='', password='') # cursor = conn.cursor(as_dict=True) # drug_table_qry = 'SELECT * FROM ' # cursor.execute(drug_table_qry) # records = [row for row in cursor.fetchall()] # lst = Listorm(record) return lst.rename(**renames)
def query_save_to(user_id, password, keywords, public_ip, headers, _file, start=True, oneByone=True, distinct=True, detail=True, append=None, to_html=None): if isinstance(keywords, str): keywords = [keywords] append_lst = open_record_file(append) excludes = append_lst.unique('id') if len(append_lst) > 0 else [] dg = DrugInfoAPI(user_id, password, public_ip, headers) results = [] if oneByone: results = dg.search_one_by_one(keywords, detail=detail, exclude_ids=excludes) else: length, step = len(keywords), 50 todo_range = list(range(0, length, 50)) iter_range = tqdm(todo_range, total=len(todo_range)) for page in iter_range: keyword = ' '.join(keywords[page:page + step]) results += dg.search(keyword, detail=detail) lst = Listorm(results) + append_lst if distinct: lst.distinct('id') if to_html: create_img_html(lst, _file=to_html, start=start) if _file or append: if start and platform.system() == 'Windows': try: lst.to_excel(append or _file) os.startfile(append or _file) return except: for row in lst: print('Exception Occuer') # print(row) return else: lst.to_excel(append or _file) return lst
def open_record_file(_file): lst = Listorm() if not _file: return lst fn, ext = os.path.splitext(_file) if ext in ['.xls', '.xlsx']: lst = read_excel(_file) elif ext == '.csv': lst = read_csv(_file) return lst
def merge_object(self, *objects, commit=True): date = time_to_normstr(datetime.date.today()) timestamp = time_to_normstr(datetime.datetime.now(), to='datetime') concated = defaultdict(list) for object in objects: for key, val in object.items(): if key == 'queryset': concated[key] = Listorm(concated[key]) | Listorm(val) elif isinstance(val, list): concated[key] += val else: concated[key].append(val) merged = {} queryset = Listorm() for key, val in concated.items(): if key == 'kinds': merged[key] = sorted(set(val), key=lambda k: kind_order.get(k, k)) elif key == 'types': merged[key] = sorted(set(val), key=lambda t: type_order.get(t, t)) elif key == 'wards': merged[key] = sorted(set(val)) elif key in ['start_date', 'start_dt']: merged[key] = sorted(val)[0] elif key in ['end_date', 'end_dt']: merged[key] = sorted(val)[-1] elif key == 'queryset': queryset = val merged['len_queryset'] = len(val) elif key == 'date': merged[key] = date instance = self._generate_initial_object(**merged) instance['slug'] = instance['slug'] + "-MERGED" instance['title'] = instance['title'] + "(병합본)" instance['queryset'] = queryset self.db.save(instance, commit) return instance
def get_order_object_list_test(order_date): ordmon = OrdMonApiRequest(API_REQ['order']['ptnt_info']) ordmon.set_test_response('response_samples/jupsoo4.4.rsp') records = ordmon.get_records() return Listorm(records) # lst = get_order_object_list('2017-09-25') # # # pass # pprint(lst.select('ptnt_no', 'ward').rename(ward='WARD').distinct('ptnt_no'))
def get_search_list(self, keyword): kwd = keyword keyword = quote(keyword, encoding='cp949') r = self.get(self.search_url + keyword) soup = BeautifulSoup(r.content, 'html.parser') table_titles = [] product_tables = [] for subheader in soup('div', {'class': 'subheader'}): inuse = subheader.text.strip() if "유통" in inuse: if '유통중인' in inuse: inuse = "유통중" else: inuse = "미확인" table_titles.append(inuse) for table in soup('table'): for tr in table('tr'): if table('table'): continue header = tr.text.strip().split('\n') if header == [ '제품명', '임부', '보험코드', '판매사', '성분/함량', '구분', '보험', '약가', '조회수', '대체', '수정' ]: product_tables.append(table) if len(table_titles) != len(product_tables): print('유통 정보와 테이블이 맞지 않습니다') return Listorm() lst_result = Listorm() kwd = kwd[:15] + '...' if len(kwd) > 15 else kwd for inuse, table in zip(table_titles, product_tables): pw = ParseWebPage(str(table)) lst = Listorm(pw.ext_tables('제품명', '임부', '보험코드', only_data=False)) lst = lst.add_columns(유통정보=lambda row: inuse, 검색어=lambda row: kwd) lst_result += lst return lst_result
def get_context_data(self, **kwargs): context = super(StockInPLVano, self).get_context_data(**kwargs) queryset = self.get_queryset().order_by('drug') if queryset.exists(): ediset = Listorm(queryset.values( 'drug__edi').distinct()).column_values('drug__edi') Info = queryset.first().drug.__class__ drugset = {edi: Info.objects.get(edi=edi) for edi in ediset} else: ediset = [] drugset = {} aggset = queryset.values('buyitem__drug').annotate( total_amount=Sum('amount')) aggset = Listorm(aggset) aggset = aggset.add_columns( total_price=lambda row: drugset.get(row.buyitem__drug ).price * row.total_amount, drug=lambda row: drugset.get(row.buyitem__drug)) context['object_list'] = aggset context['total_price'] = aggset.apply_column('total_price', sum) return context
def get_info_thread(edis): with ThreadPoolExecutor(MAX_WORKER) as executor: todo_list = [] for edi in edis: future = executor.submit(parse_detail, edi) todo_list.append(future) done_iter = tqdm.tqdm(as_completed(todo_list), total=len(edis)) ret = [] for future in done_iter: ret += future.result() return Listorm(ret)
def save_collect(*form_cleaned_datas, test=False): contexts = Listorm( norm_context(form_data, verbosing='types') for form_data in form_cleaned_datas) dates = sorted(contexts.unique('date')) min_start_date, max_end_date = contexts.min('start_date'), contexts.max( 'end_date') total_wards = sorted( set( reduce(lambda acc, elem: acc + elem, contexts.column_values('wards')))) print('saving form data...') pprint(contexts) print('min_start_date: ', min_start_date) print('max_end_date: ', max_end_date) print('total_wards:', total_wards) print('tested:', test) collector = Collector() return collector.save(dates, min_start_date, max_end_date, total_wards, *contexts, test=test)
class StaticStorage(CollectStorage): initial = [{ 'kind': kind, 'extras': "", 'excludes': "", 'exclude_groups': "" } for kind in ['LABEL', 'INJ', 'NUT']] def __init__(self, filepath=STATIC_INFO_FILE): super(StaticStorage, self).__init__(filepath) def _load(self): if os.path.isfile(self.path): with open(self.path) as fp: self.object_list = Listorm(json.loads(fp.read())) else: with open(self.path, 'w') as fp: fp.write(json.dumps(self.initial, indent=4)) self._load() def save(self, kind, **kwargs): self.object_list.update(where=lambda row: row.kind == kind, **kwargs) with open(self.path, 'w') as fp: fp.write(json.dumps(self.object_list, indent=4)) def get(self, kind=None): obj = self.object_list.filterand( kind=kind).first if kind else self.object_list return obj def as_put(self, kind): obj = self.get(kind) context = { 'kind': kind, 'extras': '\r\n'.join(obj.extras) + '\r\n' if obj.extras else '', 'excludes': '\r\n'.join(obj.excludes) + '\r\n' if obj.excludes else '' } return context
def backup_druginfo(self, filename, columns, **renames): query = "SELECT * FROM SPB_DRUG_MST" self.cursor.execute(query) records = [row for row in self.cursor.fetchall()] if records: column = [row[0] for row in self.cursor.description] lst = Listorm(records) lst.rename(**renames) lst.to_excel(filename, selects=columns)
def get_drug_list_by_edi(*edi_codes, **kwargs): drug_list = Listorm() for edi in edi_codes: drug_list += get_drug_search_list(search_bohcode=edi) drug_codes = drug_list.column_values('drug_cd') detail_list = Listorm() for drug_code in drug_codes: detail_list += get_drug_detail(drug_code, **kwargs) ret = detail_list.join(drug_list, left_on='drug_code', right_on='drug_cd') return ret
def get_drug_list(*criterias, **kwargs): drug_list = Listorm() for crit in tqdm(criterias, total=len(criterias)): drug_list += get_drug_search_list(**crit) drug_codes = drug_list.column_values('drug_cd') detail_list = Listorm() print('collecting {} items....'.format(len(drug_codes))) for drug_code in tqdm(drug_codes, total=len(drug_codes)): detail_list += get_drug_detail(drug_code, **kwargs) ret = detail_list.join(drug_list, left_on='drug_code', right_on='drug_cd') records = [dict(row) for row in ret] return records
def get_drug_search_list(**kwargs): search_result_page = retrieve_search_drug.get_search(**kwargs) drug_countset = parse_search_drug.get_count(search_result_page) ret = Listorm() for pro, cnt in drug_countset.items(): if pro == 'proy': attr_id = 'result_recorded' pro_yn = 'Y' else: attr_id = 'result_unrecorded' pro_yn = 'N' records = Listorm( parse_search_drug.parse(search_result_page, 'article', id=attr_id)) retrieve_count = len(records) if cnt > retrieve_count: search_more_result_page = retrieve_search_drug.get_search_more( cnt, proYN=pro_yn, **kwargs) records = Listorm() for page in range(1, cnt // DRUG_SEARCH_MORE_FORM['rowLength'] + 2): search_more_result_page = retrieve_search_drug.get_search_more( cnt, proYN=pro_yn, pageNo=page, **kwargs) page_records = Listorm( parse_search_drug.parse(search_more_result_page, 'article', id='resultMoreTable')) records += page_records records = records.add_columns(pro_yn=lambda row: pro_yn) ret += records return ret.distinct('drug_cd')
def get_detail(self, drug_id): if not isinstance(self.requests, requests.Session): return Listorm() record = {'id': drug_id} soup = BeautifulSoup( self.get(self.detail_url + drug_id).content, 'html.parser') components = [ a.text.strip().replace(',', '') for a in soup( 'a', href=re.compile(r'^/ingredient/ingre_view.aspx\?.+$')) ] record['성분자세히'] = ', '.join(components) find = False for tr in soup('tr'): if tr('tr'): continue if '복지부분류' in tr.text: if tr.td and tr.td.text.strip() == '복지부분류': for td in tr('td'): value = td.text.strip() g = re.search( r'(?P<efcy_code>\d+)\[(?P<efcy_name>.+)\]', value) if not g: continue efcy_code = g.group('efcy_code') efcy_name = g.group('efcy_name') if efcy_code: record['복지부분류코드'] = efcy_code.strip() record['복지부분류'] = efcy_name.strip() break pw = ParseWebPage(str(soup)) for elm in pw.ext_tables('항목', '내용'): if elm['항목'] in ['포장·유통단위', '주성분코드']: record[elm['항목']] = elm['내용'] pkg_str = record.get('포장·유통단위', '') componet_code = record.get('주성분코드', '') component_code_pattern = re.search(r'(?P<comp_code>[\dA-Z]+)\s+.+', componet_code) if component_code_pattern: record['주성분코드'] = component_code_pattern.group('comp_code').strip() record['포장정보'] = pkg_str or '' record['pkg_amount'] = self._pkg_num_from(pkg_str) record['마약류구분'] = self._get_narcotic_class(str(soup)) return record
def get_label_info(self, **renames): query = "SELECT * FROM SPB_DRUG_MST WHERE DUSE_YN='N' AND SNG_PACK_GB='S' OR SNG_PACK_GB='P'" self.cursor.execute(query) records = [row for row in self.cursor.fetchall()] return Listorm(records).rename(**renames)
def get_object_list(self): return Listorm(self.object_list[::-1])
def clear(self): self.object_list = Listorm() self.save()
def _load(self): if os.path.isfile(self.path): with open(self.path) as fp: self.object_list = Listorm(json.loads(fp.read())) else: self.object_list = Listorm()
class CollectStorage(object): def __init__(self, filepath, max_object_list_length=MAX_OBJECT_LIST_LENGTH): self.path = filepath self.max_object_list_length = max_object_list_length self._get_or_create_dir(self.path) self._load() def _get_or_create_dir(self, path): try: if os.path.isdir(path): os.makedirs(path) else: path = os.path.dirname(path) os.makedirs(path) except: return def _load(self): if os.path.isfile(self.path): with open(self.path) as fp: self.object_list = Listorm(json.loads(fp.read())) else: self.object_list = Listorm() def save(self, object=None, commit=True): if object: self.object_list.append(object) if commit: with open(self.path, 'w') as fp: object_list = self.object_list[-self.max_object_list_length:] data = json.dumps(object_list, indent=4) fp.write(data) return return self.object_list def delete(self, slug): self.object_list = self.object_list.filter( lambda row: row.slug != slug) self.save() def clear(self): self.object_list = Listorm() self.save() def get(self, slug): return self.object_list.filterand(slug=slug).first def get_object_list(self): return Listorm(self.object_list[::-1]) def get_latest(self, **kwargs): latest = self.object_list for key, val in kwargs.items(): if key == 'date': date = time_to_normstr(val) latest = latest.filterand(date=date) elif key in ['types', 'kinds', 'wards']: latest = latest.filter(lambda row: set(row[key]) <= set(val)) latest = latest.top('seq') return latest
def search(self, keyword, detail=False, exclude_ids=None): lst = self.get_search_list(keyword) id_filter = re.compile(r'/detail/product.aspx\?pid=(?P<drugId>\d+)') fda_filter = re.compile(r'^/search2/images/(?P<fda>\w+).gif$') exclude = set(exclude_ids or []) lst = lst.exclude(where=lambda row: row.id in exclude) def get_drug_id(soup): # print(soup) for a in soup('a', href=id_filter): return id_filter.search(a['href']).group('drugId') def get_title(soup): if soup.a: return soup.a.text.strip() else: return soup.text.strip() def get_fda(soup): for img in soup('img', src=fda_filter): g = fda_filter.search(img['src']) return g.group('fda') if g else '' def get_drug_image(soup): for a in soup('a', {'class': 'pro-img-link'}): _, img = a.text.split(',') return urljoin(self.host + self.img_path, img) return '' def norm_price(soup): price_str = soup.text.strip() if soup.text else '0' regx = re.compile('[^\d]') try: return int(regx.sub('', price_str)) except: return 0 def get_strip(soup): return soup.text.strip() if soup.text else '' lst = lst.add_columns( id=lambda row: get_drug_id(row.제품명), price_int=lambda row: norm_price(row.약가), ).update(제품명=lambda row: get_title(row.제품명), 임부=lambda row: get_fda(row.임부), **{ '': lambda row: get_drug_image(row['']), '보험코드': lambda row: get_strip(row['보험코드']), '판매사': lambda row: get_strip(row['판매사']), '성분/함량': lambda row: get_strip(row['성분/함량']), '구분': lambda row: get_strip(row['구분']), '보험': lambda row: get_strip(row['보험']), '약가': lambda row: get_strip(row['약가']), '조회수': lambda row: get_strip(row['조회수']), '대체': lambda row: get_strip(row['대체']), '수정': lambda row: get_strip(row['수정']), }).rename(**{'': 'img'}) id_list = lst.column_values('id') if id_list and detail: if not isinstance(self.requests, requests.Session): print('로그인 되지 않았습니다') return lst with ThreadPoolExecutor(min(self.MAX_WORKER, len(lst))) as executor: details = executor.map(self.get_detail, id_list) lst_detail = Listorm(details) return lst.join(lst_detail, on='id') return lst
def get_queryset(self): return Listorm(self.db.object_list[::-1])
def get_inj_info(self, **renames): query = "SELECT * FROM SPB_DRUG_MST WHERE DUSE_YN='N' AND MED_PTH='3'" self.cursor.execute(query) records = [row for row in self.cursor.fetchall()] return Listorm(records).rename(**renames)