def file_fuck(): # now = datetime.now().date() # future = datetime.strptime('01122015', "%d%m%Y").date() # dt = future-now # dt = dt.days # # if dt <= 100: # easygui_copath.msgbox('This software license has expired. Please download new program excecutalble from ', 'Title Goes Here') # #Input GUI - set initial vars openfile, out_dir, choice_site, choice_spec, encryption = input_gui() # clean headers outtexttemp = cleaner(openfile) # get chunks caselist = mapper(outtexttemp, choice_site, choice_spec) if len(caselist) == 0: os.rmdir(out_dir) quit() else: print '\nMAKING OUTPUT DIRECTORY: ' + out_dir open_diff_os.openFolder(out_dir) # reduce chunks fin_data, json_mrn, json_cases = reducer(caselist, choice_site) arr = [["JSON_CASES", json_cases], ["JSON_MRN", json_mrn], ["TABDELIM_CASES_OUT", fin_data], ["TEMP_CLEAN", outtexttemp]] if encryption == 1: crypt_dir = out_dir + "\\CRYPT" os.mkdir(crypt_dir) crypt_codes = "" for e in arr: fil = str(e[0]) + ".txt" filename = os.path.join(out_dir, fil) print 'filename: ' + filename os.open(filename, os.O_RDWR | os.O_CREAT) with open(filename, 'wb') as out: out.write(str(e[1])) if encryption == 1: pubkey, privkey = rsa.newkeys(128, poolsize=1) print str(e[0]) + " KEYS: " + str(pubkey), str(privkey) crypt_codes = str(crypt_codes) + str(e[0]) + " KEYS: " + str(pubkey), str(privkey) + "\n" filecrypt = crypt_dir + "/CRYPTO_" + e[0] + ".txt" os.open(filecrypt, os.O_RDWR | os.O_CREAT) with open(filename, 'rb') as infile, open(filecrypt, 'wb') as outfile: rsa.bigfile.encrypt_bigfile(infile, outfile, pubkey) if encryption == 1: filecrypts = crypt_dir + "/CRYPT_KEYS.txt" os.open(filecrypts, os.O_RDWR | os.O_CREAT) with open(filecrypts, 'wb') as out: out.write(str(crypt_codes))
def file_fuck(params): # now = datetime.now().date() # future = datetime.strptime('01122015', "%d%m%Y").date() # dt = future-now # dt = dt.days # # if dt <= 100: # easygui_copath.msgbox('This software license has expired. Please download new program excecutalble from ', 'Title Goes Here') # #Input GUI - set initial vars now = "".join([str(datetime.now())[0:4],str(datetime.now())[5:7], str(datetime.now())[8:10],str(datetime.now())[11:13], str(datetime.now())[14:16],str(datetime.now())[17:19], str(datetime.now())[20:len(str(datetime.now()))-1]]) openfile = params['openfile'] out_dir = '_'.join([params['out_dir'],now]) choice_site = params['choice_site'] choice_spec = params['choice_spec'] # clean headers outtexttemp = cleaner(openfile) os.mkdir(out_dir) # get chunks caselist, case_counts = mapper(outtexttemp, choice_site, choice_spec) if len(caselist) == 0: os.rmdir(out_dir) parsed_cases_counts = {'specimens':str(0), 'patients': str(0)} excel_truncation = str(0) # quit() else: print '\nMAKING OUTPUT DIRECTORY: ' + out_dir open_diff_os.openFolder(out_dir) # reduce chunks fin_data, json_mrn, json_cases, parsed_cases_counts, excel_truncation = reducer(caselist, choice_site) arr = [["JSON_CASES", json_cases], ["JSON_MRN", json_mrn], ["TABDELIM_CASES_OUT", fin_data], ["TEMP_CLEAN", outtexttemp]] for e in arr: fil = str(e[0]) + ".txt" filename = os.path.join(out_dir, fil) print 'filename: ' + filename os.open(filename, os.O_RDWR | os.O_CREAT) with open(filename, 'wb') as out: out.write(str(e[1])) return case_counts, parsed_cases_counts, excel_truncation
def copath_parse(params): # Input GUI - set initial vars now = ''.join([ str(datetime.now())[0:4], str(datetime.now())[5:7], str(datetime.now())[8:10], str(datetime.now())[11:13], str(datetime.now())[14:16], str(datetime.now())[17:19], str(datetime.now())[20:len(str(datetime.now())) - 1] ]) openfile = params['openfile'] out_dir = '_'.join([params['out_dir'], now]) choice_site = params['choice_site'] choice_spec = params['choice_spec'] # clean headers outtexttemp = cleaner(openfile) os.mkdir(out_dir) # get chunks caselist, case_counts = mapper(outtexttemp, choice_site, choice_spec) if len(caselist) == 0: os.rmdir(out_dir) parsed_cases_counts = {'specimens': str(0), 'patients': str(0)} excel_truncation = str(0) else: print('\nMAKING OUTPUT DIRECTORY: ' + out_dir) open_diff_os.openFolder(out_dir) # reduce chunks fin_data, json_mrn, json_cases, parsed_cases_counts, excel_truncation = reducer( caselist, choice_site) arr = [['JSON_CASES', json_cases], ['JSON_MRN', json_mrn], ['TABDELIM_CASES_OUT', fin_data], ['TEMP_CLEAN', outtexttemp]] for e in arr: fil = str(e[0]) + '.txt' filename = os.path.join(out_dir, fil) print('filename: ' + filename) os.open(filename, os.O_RDWR | os.O_CREAT) with open(filename, 'w') as out: out.write(str(e[1])) return case_counts, parsed_cases_counts, excel_truncation
def meditech_parse(params): openfile = params['openfile'] out_dir = params['out_dir'] choice_site = params['choice_site'] choice_spec = params['choice_spec'] outtext_temp = os.path.join(out_dir, 'CLEANEDTEMP') os.mkdir(outtext_temp) outtext = os.path.join(out_dir, 'CLEANED') os.mkdir(outtext) # clean headers outtexttemp = cleaner(openfile) # get chunks caselist, case_counts = mapper(outtexttemp, choice_spec) print(caselist) print(case_counts) if len(caselist) == 0: shutil.rmtree(out_dir) parsed_cases_counts = {'specimens': str(0), 'patients': str(0)} excel_truncation = str(0) else: print('\nMAKING OUTPUT DIRECTORY: ' + out_dir) open_diff_os.openFolder(out_dir) # reduce chunks fin_data, json_mrn, json_cases, parsed_cases_counts, excel_truncation = reducer( caselist, choice_site) arr = [["JSON_CASES", json_cases], ["JSON_MRN", json_mrn], ["TABDELIM_CASES_OUT", fin_data], ["TEMP_CLEAN", outtexttemp]] for e in arr: fil = str(e[0]) + ".txt" filename = os.path.join(out_dir, fil) print('filename: ' + filename) os.open(filename, os.O_RDWR | os.O_CREAT) with open(filename, 'w') as out: out.write(str(e[1])) return case_counts, parsed_cases_counts, excel_truncation
def cyto_tissue_extract(params): now = "".join([str(datetime.now())[0:4],str(datetime.now())[5:7], str(datetime.now())[8:10],str(datetime.now())[11:13], str(datetime.now())[14:16],str(datetime.now())[17:19], str(datetime.now())[20:len(str(datetime.now()))-1]]) openfile = params['openfile'] out_dir = '_'.join([params['out_dir'],now]) choice_site = params['choice_site'] with open(openfile, "r") as txt_body: txt_body = txt_body.read() os.mkdir(out_dir) print txt_body[:1000] if choice_site == 'UM': allcases = re.findall(r'([A-Z]+(?:-\w+)*,\s[A-Z\s]+)\nSpecimen\sNumber\sAccession\sDate\sTarget\sor\sRelated\sFinal\sDiagnosis\n' 'DOB:\s\d+\/\d+\/\d{4}\sMRN:\s\w+\n([A-Z]{1,2}\d{2}-\d+\s\d+\/\d+\/\d{4}\s\d+:\d{2}\s' '(Target|Related).+?\n)(?=[A-Z]+(?:-\w+)*,\s[A-Z\s]+\nSpecimen\sNumber\sAccession\sDate\sTarget\sor\s' 'Related\sFinal\sDiagnosis\nDOB:\s\d+\/\d+\/\d{4}\sMRN:\s\w+\n)', txt_body, re.S) elif choice_site == 'JHS': allcases = re.findall(r'([A-Z]+(?:-\w+)*,[A-Z\s]+)\nSpecimen\sNumber\sAccession\sDate\sTarget\sor\sRelated\sFinal\sDiagnosis\n([A-Z]{1,2}\d{2}-\d+\s' '\d+\/\d+\/\d{4}\s\d+:\d{2}\s(Target|Related).+?\n)(?=[A-Z]+(?:-\w+)*,[A-Z\s]+\nSpecimen\sNumber\sAccession\sDate\sTarget\sor\s' 'Related\sFinal\sDiagnosis\n)', txt_body, re.S) #print allcases #exit case_dict = {} for case in allcases: all_subcases = re.findall(r'([A-Z]{1,2}\d{2}-\d+)\s(\d+\/\d+\/\d{4})\s\d+:\d{2}\s(Target|Related)(.*?)\n' '(?=$|[A-Z]{1,2}\d{2}-\d+\s\d+\/\d+\/\d{4}\s\d+:\d{2}\s(?:Target|Related).*\n)', case[1], re.S) all_subcases = [{'accession_number':x[0], 'accession_date':x[1], 'case_type':x[2], 'diagnosis':encode_decode(x[3])} for x in all_subcases] if case[0] in case_dict: for c in all_subcases: case_dict[case[0]].append(c) else: case_dict[case[0]] = all_subcases case_dict = cleaner(case_dict, choice_site) with open(os.path.join(out_dir,'out_multiple_row.csv'), 'wb') as out_file: fileWriter = csv.writer(out_file) row = ['NAME', 'ACCESSION_DATE', 'TYPE', 'ACCESSION_NUMBER', 'DIAGNOSIS', 'EVENT_FILTER_1','EVENT_FILTER_2'] fileWriter.writerow(row) count = 0 for e in case_dict: for c in case_dict[e]: row = [e]+[c[x].strip() for x in c] + [''.join(["=IF(ISERROR(SEARCH(F$1,$E", str(count+2), ",1)),0,1)"]), ''.join(["=IF(ISERROR(SEARCH(G$1,$E", str(count+2), ",1)),0,1)"])] fileWriter.writerow(row) count += 1 with open(os.path.join(out_dir,'out_single_row.csv'), 'wb') as out_file: fileWriter = csv.writer(out_file) for e in case_dict: row_blob = [e] for c in case_dict[e]: row_blob = row_blob + [c[x].strip() for x in c] fileWriter.writerow(row_blob) #print case_dict with open(os.path.join(out_dir,'out_data_elastic.json'), 'wb') as out_file: json_str = json.dumps(case_dict) out_file.write(json_str) open_diff_os.openFolder(out_dir)