Ejemplo n.º 1
0
            uncompfile = open(
                config.ConfigManager().PCRatesFileFolder + '/' + filename,
                'wb')
            uncompfile.write(gzipdata)
            uncompfile.close()

        except BaseException as ex:
            utility.log_exception_file_and_filepath(
                ex,
                config.ConfigManager().PromptcloudLogFile, filepath)
        os.remove(filepath)


if __name__ == "__main__":
    utility.write_to_file(
        config.ConfigManager().PromptcloudLogFile, 'a',
        'promptcloud rates file unzip running' + ' ' +
        str(datetime.datetime.now()))
    compfile_paths = []
    compdirectory_list = []

    # putting all the folder names where file is downloaded to an array
    compdirectory_list = utility.string_to_array(
        config.ConfigManager().PCRatesCompFolder, ',', compdirectory_list)
    # getting all the paths of files
    compfile_paths = filemanager.directory_iterate(compdirectory_list)
    route_compfileread(compfile_paths)
    # archiving and deleting files so that they don't get read again
    # utility.archive_content(
    # compfile_paths, config.ConfigManager().ArchiveDirectory)
Ejemplo n.º 2
0
    print(incompletedesc)
    print(smalldesc)
    print(nonedesc)
    print(nodesc)
    print(jobsitedict)
    print(totalrecords)
    print(totaljobsdict)


if __name__ == "__main__":
    utility.write_to_file(
        config.ConfigManager().PromptcloudLogFile, 'a',
        'crawl data analysis running' + ' ' + str(datetime.datetime.now()))
    file_paths = []
    directory_list = []
    directory_list = utility.string_to_array(
        config.ConfigManager().PCFileFolder, ',', directory_list)
    file_paths = filemanager.directory_iterate(directory_list)
    #os.system("echo 'b' | sudo -S touch " + analysis_file)
    #os.system("echo 'b' | sudo -S chmod 777 " + analysis_file)
    #os.system("echo 'b' | sudo -S chown neeshu:neeshu " + analysis_file)
    # os.system("echo 'b' | sudo -S cp " + analysis_file +" /mnt/nlpdata")
    # analyzing xml
    analyze_data(file_paths)

    # capturing valid records
    valid_records()
    os.system("echo 'b' | sudo -S cp " + analysis_file + " /mnt/nlpdata")
    os.system("rm " + analysis_file)
    # utility.archive_content(
    # file_paths, config.ConfigManager().ArchiveDirectory)
Ejemplo n.º 3
0
    data_read_count += 1
    utility.write_to_file(config.ConfigManager(
    ).ExecutioncountFile, 'w', str(data_read_count))
    dictionaries.UpdateTemplateWhere['_id'] = configdocs[0]['_id']
    dictionaries.UpdateTemplateSet['docid_count'] = docid_count
    dictionaries.DBSet['$set'] = dictionaries.UpdateTemplateSet
    custom.update_data_to_Db_noupsert(int(config.ConfigManager().MongoDBPort), config.ConfigManager().DataCollectionDB, config.ConfigManager(
    ).ConfigCollection, dictionaries.UpdateTemplateWhere, dictionaries.DBSet, connection)


if __name__ == "__main__":

    file_paths = []
    directory_list = []
    directory_list = utility.string_to_array(
        config.ConfigManager().DirectoryList, ',', directory_list)
    file_paths = filemanager.directory_iterate(directory_list)
    route_dataread(file_paths)
    # utility.archive_content(
    # file_paths, config.ConfigManager().ArchiveDirectory)
    #connection = dbmanager.mongoDB_connection(int(config.ConfigManager().MongoDBPort))
    # configdocs = custom.retrieve_data_from_DB(int(config.ConfigManager(
    #).MongoDBPort), config.ConfigManager().DataCollectionDB, config.ConfigManager().ConfigCollection)
    #docid_count = int(configdocs[0]['docid_count'])
    # docid_count = custom.data_from_DB(config.ConfigManager().STConnStr, config.ConfigManager(
    #).STJobQueryId, config.ConfigManager().JobDetails, config.ConfigManager().ST, docid_count)
    # docid_count = custom.data_from_DB(config.ConfigManager().STConnStr, config.ConfigManager(
    #).STCandidateQueryId, config.ConfigManager().CandidateDetails, config.ConfigManager().ST, docid_count)
    # docid_count = custom.data_from_DB(config.ConfigManager().XchangeConnStr, config.ConfigManager(
    #).XchangeJobQueryId, config.ConfigManager().JobDetails, config.ConfigManager().Xchange, docid_count)
    # docid_count = custom.data_from_DB(config.ConfigManager().XchangeConnStr, config.ConfigManager(
Ejemplo n.º 4
0
seq = seq_file.read()
seq_file.close()
print(model_type)
if model_type == 'cnn':
    res = open('results/cnn_result.csv','a')
elif model_type == 'random':
    res = open('results/random_result.csv','a')
elif model_type == 'original':
    res = open('results/original_result.csv','a')
else:
    print('lstm')
    res = open('results/lstm_result.csv','a')

gc = gc_content(seq)
cpg = cpg_oeratio(seq)
hits, tot = motif_match(string_to_array(seq))

pos_org , en_org = np.loadtxt('results/original/s.cerevisiae/s.cerevisiae.fa.nuc', unpack = True, skiprows = 1)
datafile = sys.argv[4]
pos, en = np.loadtxt(datafile, unpack = True, skiprows = 1)
length = len(en) 
assert np.alltrue(pos[:length]==pos_org[:length])
abst = np.sum(abs(en[:length]-en_org[:length]))/length
#pos = pos[:10000]
#en = en[:10000]
spl = UnivariateSpline(pos, en)
spl.set_smoothing_factor(150000)
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
fig = plt.figure()
plt.gcf().subplots_adjust(bottom=0.13)
Ejemplo n.º 5
0
                strtimestamp += ' ' + str(datetime.datetime.now())
                if filepath[-4:].lower() == ".doc":
                    docreadcount += 1
                    print('Total doc read count:' + str(docreadcount))
            print('File : ' + str(file_count) + ' ' + strtimestamp)
            print('Antiword empty count:' + str(antiwordemptycount))
        except BaseException as ex:
            exception_message = '\n' + 'Exception:' + \
                str(datetime.datetime.now()) + '\n'
            exception_message += 'File: ' + '\n'
            exception_message += '\n' + str(ex) + '\n'
            exception_message += '-' * 100
            # .encode('utf8'))
            utility.write_to_file(config.ConfigManager().LogFile, 'a',
                                  exception_message)
    utility.write_to_file(
        config.ConfigManager().LogFile, 'a', 'Number of resumes read - ' +
        str(file_count) + ' ' + str(datetime.datetime.now()))


if __name__ == "__main__":

    file_paths = []
    directory_list = []
    directory_list = utility.string_to_array(
        config.ConfigManager().ResumeDirectory, ',', directory_list)
    file_paths = filemanager.directory_iterate(directory_list)
    route_dataread(file_paths)
    utility.archive_content(file_paths,
                            config.ConfigManager().ArchiveDirectory)