uncompfile = open( config.ConfigManager().PCRatesFileFolder + '/' + filename, 'wb') uncompfile.write(gzipdata) uncompfile.close() except BaseException as ex: utility.log_exception_file_and_filepath( ex, config.ConfigManager().PromptcloudLogFile, filepath) os.remove(filepath) if __name__ == "__main__": utility.write_to_file( config.ConfigManager().PromptcloudLogFile, 'a', 'promptcloud rates file unzip running' + ' ' + str(datetime.datetime.now())) compfile_paths = [] compdirectory_list = [] # putting all the folder names where file is downloaded to an array compdirectory_list = utility.string_to_array( config.ConfigManager().PCRatesCompFolder, ',', compdirectory_list) # getting all the paths of files compfile_paths = filemanager.directory_iterate(compdirectory_list) route_compfileread(compfile_paths) # archiving and deleting files so that they don't get read again # utility.archive_content( # compfile_paths, config.ConfigManager().ArchiveDirectory)
print(incompletedesc) print(smalldesc) print(nonedesc) print(nodesc) print(jobsitedict) print(totalrecords) print(totaljobsdict) if __name__ == "__main__": utility.write_to_file( config.ConfigManager().PromptcloudLogFile, 'a', 'crawl data analysis running' + ' ' + str(datetime.datetime.now())) file_paths = [] directory_list = [] directory_list = utility.string_to_array( config.ConfigManager().PCFileFolder, ',', directory_list) file_paths = filemanager.directory_iterate(directory_list) #os.system("echo 'b' | sudo -S touch " + analysis_file) #os.system("echo 'b' | sudo -S chmod 777 " + analysis_file) #os.system("echo 'b' | sudo -S chown neeshu:neeshu " + analysis_file) # os.system("echo 'b' | sudo -S cp " + analysis_file +" /mnt/nlpdata") # analyzing xml analyze_data(file_paths) # capturing valid records valid_records() os.system("echo 'b' | sudo -S cp " + analysis_file + " /mnt/nlpdata") os.system("rm " + analysis_file) # utility.archive_content( # file_paths, config.ConfigManager().ArchiveDirectory)
data_read_count += 1 utility.write_to_file(config.ConfigManager( ).ExecutioncountFile, 'w', str(data_read_count)) dictionaries.UpdateTemplateWhere['_id'] = configdocs[0]['_id'] dictionaries.UpdateTemplateSet['docid_count'] = docid_count dictionaries.DBSet['$set'] = dictionaries.UpdateTemplateSet custom.update_data_to_Db_noupsert(int(config.ConfigManager().MongoDBPort), config.ConfigManager().DataCollectionDB, config.ConfigManager( ).ConfigCollection, dictionaries.UpdateTemplateWhere, dictionaries.DBSet, connection) if __name__ == "__main__": file_paths = [] directory_list = [] directory_list = utility.string_to_array( config.ConfigManager().DirectoryList, ',', directory_list) file_paths = filemanager.directory_iterate(directory_list) route_dataread(file_paths) # utility.archive_content( # file_paths, config.ConfigManager().ArchiveDirectory) #connection = dbmanager.mongoDB_connection(int(config.ConfigManager().MongoDBPort)) # configdocs = custom.retrieve_data_from_DB(int(config.ConfigManager( #).MongoDBPort), config.ConfigManager().DataCollectionDB, config.ConfigManager().ConfigCollection) #docid_count = int(configdocs[0]['docid_count']) # docid_count = custom.data_from_DB(config.ConfigManager().STConnStr, config.ConfigManager( #).STJobQueryId, config.ConfigManager().JobDetails, config.ConfigManager().ST, docid_count) # docid_count = custom.data_from_DB(config.ConfigManager().STConnStr, config.ConfigManager( #).STCandidateQueryId, config.ConfigManager().CandidateDetails, config.ConfigManager().ST, docid_count) # docid_count = custom.data_from_DB(config.ConfigManager().XchangeConnStr, config.ConfigManager( #).XchangeJobQueryId, config.ConfigManager().JobDetails, config.ConfigManager().Xchange, docid_count) # docid_count = custom.data_from_DB(config.ConfigManager().XchangeConnStr, config.ConfigManager(
seq = seq_file.read() seq_file.close() print(model_type) if model_type == 'cnn': res = open('results/cnn_result.csv','a') elif model_type == 'random': res = open('results/random_result.csv','a') elif model_type == 'original': res = open('results/original_result.csv','a') else: print('lstm') res = open('results/lstm_result.csv','a') gc = gc_content(seq) cpg = cpg_oeratio(seq) hits, tot = motif_match(string_to_array(seq)) pos_org , en_org = np.loadtxt('results/original/s.cerevisiae/s.cerevisiae.fa.nuc', unpack = True, skiprows = 1) datafile = sys.argv[4] pos, en = np.loadtxt(datafile, unpack = True, skiprows = 1) length = len(en) assert np.alltrue(pos[:length]==pos_org[:length]) abst = np.sum(abs(en[:length]-en_org[:length]))/length #pos = pos[:10000] #en = en[:10000] spl = UnivariateSpline(pos, en) spl.set_smoothing_factor(150000) plt.rc('text', usetex=True) plt.rc('font', family='serif') fig = plt.figure() plt.gcf().subplots_adjust(bottom=0.13)
strtimestamp += ' ' + str(datetime.datetime.now()) if filepath[-4:].lower() == ".doc": docreadcount += 1 print('Total doc read count:' + str(docreadcount)) print('File : ' + str(file_count) + ' ' + strtimestamp) print('Antiword empty count:' + str(antiwordemptycount)) except BaseException as ex: exception_message = '\n' + 'Exception:' + \ str(datetime.datetime.now()) + '\n' exception_message += 'File: ' + '\n' exception_message += '\n' + str(ex) + '\n' exception_message += '-' * 100 # .encode('utf8')) utility.write_to_file(config.ConfigManager().LogFile, 'a', exception_message) utility.write_to_file( config.ConfigManager().LogFile, 'a', 'Number of resumes read - ' + str(file_count) + ' ' + str(datetime.datetime.now())) if __name__ == "__main__": file_paths = [] directory_list = [] directory_list = utility.string_to_array( config.ConfigManager().ResumeDirectory, ',', directory_list) file_paths = filemanager.directory_iterate(directory_list) route_dataread(file_paths) utility.archive_content(file_paths, config.ConfigManager().ArchiveDirectory)