Example #1
0
def route_compfileread(filepaths):
    for filepath in filepaths:
        try:
            # extracting data from .gz file.
            gzipfile = gzip.GzipFile(filepath, 'rb')
            gzipdata = gzipfile.read()
            gzipfile.close()

            # getting complete file name of the .gz file
            compfilename = utility.filename_from_filepath(filepath)
            # extracting the original file name
            filename = compfilename.split('.gz')[0]
            print(filename)

            # creating file and writing data
            uncompfile = open(
                config.ConfigManager().PCFileFolder + '/' + filename, 'wb')
            uncompfile.write(gzipdata)
            uncompfile.close()

        except BaseException as ex:
            utility.log_exception_with_filepath(ex, filepath)
            # writing to file the file names that cannot be extracted using
            # gzip
            utility.write_to_file(
                config.ConfigManager().PCDataAnalysisResultsFile, 'a',
                compfilename + '  cannot be extracted')
        os.remove(filepath)
Example #2
0
def automate_processes():
    utility.write_to_file(config.ConfigManager().PromptcloudLogFile, 'a',
                          'PromptCloudautomationscript running')
    try:
        # download files into PCCompData with in mnt/nlpdata,xml format..
        exec(open('pc_download_crawldata_threading.py').read(), globals())
        # compress the PCCompdata folder
        exec(open('compress.py').read(), globals())
        # unzip files created in PCData folder time stored in dataloadconfig..
        exec(open('pc_unzip_gz.py').read(), globals())
        # download data into pcdataanalysisresults.ods
        exec(open('analyze_crawldata.py').read(), globals())
        # for automatically sending emails
        # exec(open('mailsend.py').read(), globals())
        # store analysis file in s3 backup
        exec(open('pcdataanalysisbackup.py').read(), globals())
    except BaseException as ex:
        exception_message = '\n' + 'Exception:' + \
            str(datetime.datetime.now()) + '\n'
        exception_message += 'File: ' + '\n'
        exception_message += '\n' + str(ex) + '\n'
        exception_message += '-' * 100
        # .encode('utf8'))
        utility.write_to_file(config.ConfigManager().PromptcloudLogFile, 'a',
                              exception_message)
Example #3
0
def automate_processes():
    utility.write_to_file(config.ConfigManager().LogFile, 'a',
                          ' master automationscript running')
    try:
        utility.update_config_coll_process_started_date()
        # Supplier master list load
        exec(open('st_master_supplier_data_read.py').read(), globals())
        # Client master list load
        exec(open('stclientsdataread.py').read(), globals())
        # Currency master list load
        exec(open('currencydataread.py').read(), globals())
        # Industry master list load
        exec(open('industrydataread.py').read(), globals())
        # MSP master list load
        exec(open('stmspdataread.py').read(), globals())
        # Rates information transfer from Smart Track
        exec(open('stratesdataread.py').read(), globals())
        # PromptCloud data load automation
        exec(open('prompt_cloud_automation.py').read(), globals())
        # Transferring files from staging collection to masters collection
        exec(open('staging_data_read.py').read(), globals())
        # Generating master integer graph
        exec(open('gen_docintgraph_from_db.py').read(), globals())
        # Transfering file to webserver
        exec(open('master_int_graph_transfer.py').read(), globals())
        # Learning automation
        exec(open('knowledge_build_automation.py').read(), globals())
    except BaseException as ex:
        utility.log_exception_file(config.ConfigManager().LogFile, ex)
def analyze_data(filepaths):
    global totalrecords
    global invalidrecords
    global emptydesc
    global incompletedesc
    global smalldesc
    global nonedesc
    global nodesc
    global totaljobsdict
    global jobsitedict
    filecount = 0
    dbrecordcount = 0
    # looping through file paths
    for filepath in filepaths:
        filecount += 1
        print(filepath)
        print('Processing file number: ' + str(filecount))

        # getting xml tree from file
        tree = datareadfiletypes.read_xml_tree(filepath)
        # drilling xml to get the job info tag contents
        if config.ConfigManager().PromptCloudRecordLimitSet == "Yes":
            if dbrecordcount < int(
                    config.ConfigManager().PromptCloudRecordLimit):
                for page in tree.getroot().findall('page'):
                    # dbrecordcount = job_info_analysis(page, filepath, dbrecordcount)
                    page_dict_object = utility.xml_to_dict(ET.tostring(page))
                    dbrecordcount = pc_rates_data_storage(
                        page_dict_object, filepath, dbrecordcount)
        print(str(datetime.datetime.now()))
        os.remove(filepath)
def insert_to_db(dict_object_record_list):
    # dummy collection PCRatesDataColl
    global connection
    custom.insert_data_to_DB_dBCollection(
        dict_object_record_list,
        config.ConfigManager().stagingCollection, connection,
        config.ConfigManager().RatesDB)
Example #6
0
def nounphrase_generate():
    c = MongoClient(dcrconfig.ConfigManager().Datadb)
    db = c[config.ConfigManager().IntelligenceDb]
    col = db[config.ConfigManager().IntelligenceDataCollection]
    docs = col.find({'nounPhrases': ""}, {
        "description": 1,
        "doc_id": 1,
        "_id": 1
    })

    mongoport = int(config.ConfigManager().MongoDBPort)
    connection = dbmanager.mongoDB_connection(mongoport)

    for doc in docs:
        try:
            data = {}
            data['desc'] = doc['description']
            data['_id'] = doc['_id']
            data['doc_id'] = doc['doc_id']
            data['connection'] = connection
            q.put(data)

        except BaseException as ex:
            exception_message = '\n' + 'Exception:' + '\n'
            str(datetime.datetime.now()) + '\n'
            exception_message += 'File: ' + '\n'
            exception_message += '\n' + str(ex) + '\n'
            exception_message += '-' * 100
            utility.write_to_file(
                dcrconfig.ConfigManager().SemanticGraphLogFile, 'a',
                exception_message)
Example #7
0
def route_compfileread(filepaths):
    for filepath in filepaths:
        try:
            # extracting data from .gz file.
            gzipfile = gzip.GzipFile(filepath, 'rb')
            gzipdata = gzipfile.read()
            gzipfile.close()

            # getting complete file name of the .gz file
            compfilename = utility.filename_from_filepath(filepath)
            # extracting the original file name
            filename = compfilename.split('.gz')[0]
            print(filename)

            # creating file and writing data
            uncompfile = open(
                config.ConfigManager().PCRatesFileFolder + '/' + filename,
                'wb')
            uncompfile.write(gzipdata)
            uncompfile.close()

        except BaseException as ex:
            utility.log_exception_file_and_filepath(
                ex,
                config.ConfigManager().PromptcloudLogFile, filepath)
        os.remove(filepath)
def valid_records():
    global totaljobsdict
    global jobsitedict

    # subtracting dictionary key values to get valid records per site
    validjobsdict = {key: totaljobsdict[key] - jobsitedict.get(key, 0)
                     for key in totaljobsdict.keys()}
    utility.write_to_file(config.ConfigManager().PCDataAnalysisResultsFile,
                          'a', 'Total valid records per site: ')
    utility.write_to_file(config.ConfigManager().PCDataAnalysisResultsFile,
                          'a', str(validjobsdict))
Example #9
0
def update_DB(configdocs, latestdate):
    connection = dbmanager.mongoDB_connection(
        int(config.ConfigManager().MongoDBPort))
    dictionaries.UpdateTemplateSet = {}
    dictionaries.UpdateTemplateWhere = {}
    dictionaries.UpdateTemplateSet['PClastDate'] = latestdate
    dictionaries.UpdateTemplateWhere['_id'] = configdocs[0]['_id']
    dictionaries.DBSet['$set'] = dictionaries.UpdateTemplateSet
    custom.update_data_to_Db_noupsert(int(config.ConfigManager().MongoDBPort),
                                      config.ConfigManager().DataCollectionDB,
                                      config.ConfigManager().ConfigCollection,
                                      dictionaries.UpdateTemplateWhere,
                                      dictionaries.DBSet, connection)
Example #10
0
def nounphrase_generate():
    docs = custom.retrieve_rowdata_from_DB(
        int(config.ConfigManager().MongoDBPort),
        config.ConfigManager().DataCollectionDB,
        config.ConfigManager().DataCollectionDBCollection,
        dictionaries.DBWhereConditon)
    connection = dbmanager.mongoDB_connection(
        int(config.ConfigManager().MongoDBPort))
    description = ''
    for doc in docs:
        try:
            description = doc['description']
            noun_phrases = dcrnlp.extract_nounphrases_sentences(description)
            dictionaries.UpdateTemplateSet['nounPhrases'] = noun_phrases
            dictionaries.UpdateTemplateWhere['_id'] = doc['_id']
            dictionaries.DBSet['$set'] = dictionaries.UpdateTemplateSet
            custom.update_data_to_Db_con(
                int(config.ConfigManager().MongoDBPort),
                config.ConfigManager().DataCollectionDB,
                config.ConfigManager().DataCollectionDBCollection,
                dictionaries.UpdateTemplateWhere, dictionaries.DBSet,
                connection)
        except BaseException as ex:
            exception_message = '\n' + 'Exception:' + \
                str(datetime.datetime.now()) + '\n'
            exception_message += 'File: ' + '\n'
            exception_message += '\n' + str(ex) + '\n'
            exception_message += '-' * 100
            utility.write_to_file(config.ConfigManager().LogFile, 'a',
                                  exception_message)
def automate_processes():
    utility.write_to_file(config.ConfigManager().LogFile, 'a',
                          'stautomationscript running')
    try:
        # Reading requirement and candidate data from ST
        exec(open('stdataread.py').read(), globals())
        # Extracting candidate resumes
        exec(open('resume_extract.py').read(), globals())
        # Read extracted resumes and update to 'resumeText' field
        exec(open('resumeread.py').read(), globals())
        # Appending 'resumeText' to description field
        exec(open('resume_append.py').read(), globals())
        # Generate nounphrases for candidate table
        exec(open('stnounphrase_generate.py').read(), globals())
        # Update requirements and rates for candidates
        exec(open('requirement_update_fastest.py').read(), globals())
        # Update candidate statuses which changed
        exec(open('submission_status_update.py').read(), globals())
        # Extracting requirement description files
        exec(open('req_desc_file_extract.py').read(), globals())
        # Read extracted description files and update to 'reqFileDesc' field
        exec(open('req_desc_file_read.py').read(), globals())
        # Appending 'reqFileDesc' to description field
        exec(open('req_desc_file_append.py').read(), globals())
        # Generate nounPhrases for requirement tables
        exec(open('streqnounphrase_generate.py').read(), globals())
        # Get supplier info
        exec(open('stsupplierdataread.py').read(), globals())
        # Candidate resume screening
        exec(open('contactinfodetect.py').read(), globals())
        # Client master list load
        # exec(open('stclientsdataread.py').read(), globals())
        # # Currency master list load
        # exec(open('currencydataread.py').read(), globals())
        # # Industry master list load
        # exec(open('industrydataread.py').read(), globals())
        # # MSP master list load
        # exec(open('stmspdataread.py').read(), globals())
        # currency code update
        exec(open('stcandidateCurrency_update_fastest.py').read(), globals())
    except BaseException as ex:
        exception_message = '\n' + 'Exception:' + \
            str(datetime.datetime.now()) + '\n'
        exception_message += 'File: ' + '\n'
        exception_message += '\n' + str(ex) + '\n'
        exception_message += '-' * 100
        # .encode('utf8'))
        utility.write_to_file(config.ConfigManager().LogFile, 'a',
                              exception_message)
Example #12
0
def updateconfigcollection(docid, dateTime, whereID):
    connection = dbmanager.mongoDB_connection(
        int(config.ConfigManager().MongoDBPort))
    UpdateTemplateWhere = utility.clean_dict()
    UpdateTemplateSet = utility.clean_dict()
    UpdateTemplateWhere['_id'] = whereID
    UpdateTemplateSet['masterDocId'] = docid
    UpdateTemplateSet['stagingDateModified'] = dateTime
    DBSet = utility.clean_dict()
    DBSet['$set'] = UpdateTemplateSet
    custom.update_data_to_Db_noupsert(
        int(config.ConfigManager().MongoDBPort),
        config.ConfigManager().RatesDB,
        config.ConfigManager().RatesConfigCollection, UpdateTemplateWhere,
        DBSet, connection)
Example #13
0
    def send_config(self):
        mode, ok = QInputDialog.getItem(self, "Select config sending mode",
                                        "Mode:", ("Modify", "Rewrite"), 0,
                                        False)
        if not ok or not mode:
            return

        path = QFileDialog.getOpenFileName(
            self,
            "Select configuration file",
            filter="Configs (*.ini *.txt *.cfg)")[0]
        if not path:
            return

        config = cfg.ConfigManager()
        config.load_only_config(path)
        data = config.full_dict(include_defaults=False)
        logging.info(f"Loaded config from {path}")

        copters = self.model.user_selected()
        for copter in copters:
            copter.client.send_message("config",
                                       kwargs={
                                           "config": data,
                                           "mode": mode.lower()
                                       })
def pc_rates_data_storage(page_dict_object, filepath, dbrecordcount):
    global totalrecords
    global invalidrecords
    global emptydesc
    global incompletedesc
    global smalldesc
    global nonedesc
    global nodesc
    global totaljobsdict
    global jobsitedict
    dict_object_record_list = []
    try:
        page_object_list = page_dict_object['page']
        if isinstance(page_object_list['record'], list):
            for record_object in page_object_list['record']:
                record_object = pc_rates_add_fields(record_object, filepath)
                if sys.getsizeof(record_object['description']) < 13000000:
                    dict_object_record_list.append(record_object)
                dbrecordcount += 1
        else:
            record_object = page_object_list['record']
            record_object = pc_rates_add_fields(record_object, filepath)

            if sys.getsizeof(record_object['description']) < 13000000:
                dict_object_record_list.append(record_object)
            dbrecordcount += 1
    except BaseException as ex:
        utility.log_exception_file(ex,
                                   config.ConfigManager().PromptcloudLogFile)
    if dict_object_record_list:
        insert_to_db(dict_object_record_list)
    # updating doc_id in config table

    return dbrecordcount
Example #15
0
def main():
    parser = argparse.ArgumentParser(description="""
        Service to calculate statistics
    """)
    parser.add_argument('--config', help='configuration file', default=None)
    args = parser.parse_args()
    confs = config.ConfigManager()
    if args.config is not None:
        with open(args.config, "r") as conffile:
            confs.load_from_file(conffile)
    logging.basicConfig(level=getattr(logging, confs["LogLevel"].upper()))
    address = confs["address"]
    logging.info("Starting grpc server with address :{}".format(address))
    logging.info("Starting grpc server {} workers".format(confs["workers"]))
    server = grpc.server(futures.ThreadPoolExecutor(max_workers=confs["workers"]))
    objs= grpc.insecure_channel(confs["object_service"]["url"])
    data = grpc.insecure_channel(confs["data_service"]["url"])
    stats_pb2_grpc.add_StatsServiceServicer_to_server(StatsServiceServ(data, objs), server)
    server.add_insecure_port(address)
    server.start()
    try:
        while True:
            time.sleep(10)
    except KeyboardInterrupt:
        logging.info("Stop signal got")
        server.stop(0)
def modifygeodata():
    ratesData = mastercoll.find({})
    for row in ratesData:
        try:
            if row['cityLocationFlag'] == 1:
                cityGeoLocation = []
                cityGeoLocation.append(float(row['cityLongitude']))
                cityGeoLocation.append(float(row['cityLatitude']))
                row['coordinates'] = cityGeoLocation
                mastercoll.update({"doc_id": row['doc_id']},
                                  {"$set": {
                                      "coordinates": cityGeoLocation
                                  }})


# if row['stateLocationFlag'] == 1:
#     stateGeoLocation = []
#     stateGeoLocation.append(float(row['stateLatitude']))
#     stateGeoLocation.append(float(row['stateLongitude']))
#     row['stateGeoLocation'] = stateGeoLocation
#     # print(row,"\n")
#     mastercoll.update({"doc_id": row['doc_id']},
#    {"$set": {"stateGeoLocation": stateGeoLocation}})

        except BaseException as ex:
            utility.log_exception_file(ex, config.ConfigManager().LogFile)
Example #17
0
def main():
    parser = argparse.ArgumentParser(description="""
        Service to store objects
    """)
    parser.add_argument('--config', help='configuration file', default=None)
    args = parser.parse_args()
    confs = config.ConfigManager()
    if args.config is not None:
        with open(args.config, "r") as conffile:
            confs.load_from_file(conffile)
    logging.basicConfig(level=getattr(logging, confs["LogLevel"].upper()))
    address = confs["address"]
    logging.info("Starting grpc server with address :{}".format(address))
    logging.info("Starting grpc server {} workers".format(confs["workers"]))
    server = grpc.server(
        futures.ThreadPoolExecutor(max_workers=confs["workers"]))
    dbconf = confs["database"]
    logging.info("Connecting to {} with username: {}, host: {}".format(
        dbconf["database"], dbconf["username"], dbconf["host"]))
    database = psycopg2.connect(dbname=dbconf["database"],
                                user=dbconf["username"],
                                password=dbconf["password"],
                                host=dbconf["host"])
    objects_pb2_grpc.add_ObjectServiceServicer_to_server(
        ObjectServiceServ(database), server)
    server.add_insecure_port(address)
    server.start()
    try:
        while True:
            time.sleep(10)
    except KeyboardInterrupt:
        logging.info("Stop signal got")
        server.stop(0)
Example #18
0
def main():
    parser = argparse.ArgumentParser(description="""
        Service to store and process data companies
    """)
    parser.add_argument('--config', help='configuration file', default=None)
    args = parser.parse_args()
    confs = config.ConfigManager()
    if args.config is not None:
        with open(args.config, "r") as conffile:
            confs.load_from_file(conffile)
    logging.basicConfig(level=getattr(logging, confs["LogLevel"].upper()))
    address = confs["address"]
    logging.info("Starting grpc server with address :{}".format(address))
    logging.info("Starting grpc server {} workers".format(confs["workers"]))
    server = grpc.server(
        futures.ThreadPoolExecutor(max_workers=confs["workers"]))
    #TODO: Решить как лучше: так или как depend
    mgocli = MongoClient(confs["database"]["url"])
    databse = UserDb(mgocli["user_database"])
    users_pb2_grpc.add_UserInfoServiceServicer_to_server(
        UsersServiceServ(databse), server)
    server.add_insecure_port(address)
    server.start()
    try:
        while True:
            time.sleep(10)
    except KeyboardInterrupt:
        logging.info("Stop signal got")
        server.stop(0)
Example #19
0
def main():
    parser = argparse.ArgumentParser(description="""
        Service to store data
    """)
    parser.add_argument('--config', help='configuration file', default=None)
    args = parser.parse_args()
    confs = config.ConfigManager()
    if args.config is not None:
        with open(args.config, "r") as conffile:
            confs.load_from_file(conffile)
    logging.basicConfig(level=getattr(logging, confs["LogLevel"].upper()))
    address = confs["address"]
    objs = grpc.insecure_channel(confs["objs"])
    logging.info("Starting grpc server with address :{}".format(address))
    logging.info("Starting grpc server {} workers".format(confs["workers"]))
    mgocli = MongoClient(confs["database"]["url"])
    server = grpc.server(futures.ThreadPoolExecutor(max_workers=confs["workers"]))
    data_pb2_grpc.add_DataServiceServicer_to_server(DataServiceServ(SensorDataModel(mgocli), objs), server)
    server.add_insecure_port(address)
    server.start()
    run_consumer(mgocli, confs["rabbit"], objs)
    try:
        while True:
            time.sleep(10)
    except KeyboardInterrupt:
        logging.info("Stop signal got")
        server.stop(0)
Example #20
0
def control_loop():
    """
    Main program loop. Refers to state of files in target directory at regular (five
    second) intervals and stores any changes. 

    Arguments:
        dir_path (str): path of target directory
    """
    configure = config.ConfigManager()
    dir_path = configure.get_target_path()
    temp_path = configure.get_temp_path()
    interval = configure.get_interval()

    try:
        manager = manage.FileManager(dir_path, temp_path)
    except manage.InvalidDirectoryError:
        return

    while True:
        time.sleep(interval)
        interval = configure.get_interval()
        active = configure.get_active()
        if dir_path != configure.get_target_path():
            dir_path = configure.get_target_path()
            manager.set_target_directory(dir_path)
            print(f"Change to {dir_path}")
        if not active or not manager.has_changed():
            print(f"No changes (Active: {active})")
            continue
        changes = manager.store_changes()
        print(changes)
Example #21
0
def automate_processes():
    utility.write_to_file(config.ConfigManager().LogFile, 'a',
                          'pcanalysisautomationscript running')
    try:
        exec(open('download_crawldata_threading.py').read(), globals())
        exec(open('unzip_gz.py').read(), globals())
        exec(open('analyze_crawldata.py').read(), globals())
    except BaseException as ex:
        exception_message = '\n' + 'Exception:' + \
            str(datetime.datetime.now()) + '\n'
        exception_message += 'File: ' + '\n'
        exception_message += '\n' + str(ex) + '\n'
        exception_message += '-' * 100
        # .encode('utf8'))
        utility.write_to_file(config.ConfigManager().LogFile, 'a',
                              exception_message)
def readstagingdata():
    utility.write_to_file(
        config.ConfigManager().LogFile, 'a',
        'Staging dataread running' + ' ' + str(datetime.datetime.now()))
    ratesConfigValues = ratesConfig.find({})
    ratesDate = ratesConfigValues[0]['stagingDateModified']
    ratesData = stagingcoll.find({'dateModified': {
        "$gt": ratesDate
    }},
                                 no_cursor_timeout=True)
    doc_id = ratesConfigValues[0]['masterDocId']
    objectid = ratesConfigValues[0]['_id']
    dateModifiedList = []
    geoCountryQuery = "select distinct name,iso_alpha3, fips_code from geo_country order by name"
    geoStateQuery = "select ga1.name, gn.admin1, gn.latitude, gn.longitude from geo_admin1 ga1 inner join geo_name gn on ga1.geonameid = gn.geonameid"
    geoCityQuery = "select distinct sPlaceName, fLatitude, fLongitude from GeoPostal order by sPlaceName"
    geoZipCodeQuery = "select distinct sPostalCode, fLatitude, fLongitude from GeoPostal  order by sPostalCode"
    countryDictList = custom.create_sql_dict_list(
        geoCountryQuery,
        config.ConfigManager().geographicalDataConnstr)
    stateDictList = custom.create_sql_dict_list(
        geoStateQuery,
        config.ConfigManager().geographicalDataConnstr)
    cityDictList = custom.create_sql_dict_list(
        geoCityQuery,
        config.ConfigManager().geographicalDataConnstr)
    zipCodeDictList = custom.create_sql_dict_list(
        geoZipCodeQuery,
        config.ConfigManager().geographicalDataConnstr)
    i = 0
    for row in ratesData:
        dateModifiedList.append(row['dateModified'])
        i += 1
        del row['_id']
        doc_id += 1
        row['doc_id'] = doc_id
        row['stagingDateModified'] = max(dateModifiedList)
        row['i'] = i
        row['objectid'] = objectid
        row['countryDictList'] = countryDictList
        row['stateDictList'] = stateDictList
        row['cityDictList'] = cityDictList
        row['zipCodeDictList'] = zipCodeDictList
        q.put(row)

    ratesData.close()
    del ratesData
def process_staging_row(row):
    try:
        global dataList
        "Step:1 data scrubbing for email,phone,url and candidate name"
        row = dataclean(row)

        "Step:2 nounphrases generation"
        row = generatenounphrases(row)

        "Step:3 signature generation"
        row = signaturegraph(row)

        "Step:4 rates calculation"
        row = rates_calculation.billratescalculation(row)

        # Put rate value calculation before this check
        "Step:5 verification of rate availability"
        row = rate_available(row)

        # geographical data check and additions
        row = custom.geo_data_check(row, row['countryDictList'], 'country')
        row = custom.geo_data_check(row, row['stateDictList'], 'state')
        row = custom.geo_data_check(row, row['cityDictList'], 'city')
        row = custom.geo_data_check(row, row['zipCodeDictList'], 'zipCode')
        del row['countryDictList']
        del row['stateDictList']
        del row['cityDictList']
        del row['zipCodeDictList']

        dataList.append(row)
        if row['i'] % int(
                config.ConfigManager().StagingMasterTransferStep) == 0:
            stagingDateModified = row['stagingDateModified']
            del row['stagingDateModified']
            objectid = row['objectid']
            del row['objectid']
            del row['i']
            "Step:4 insert data to db"
            mastercoll.insert(dataList)
            dataList = []
            docid = row['doc_id']

            "Step:5 update config collection with doc_id and datetime"
            updateconfigcollection(docid, stagingDateModified, objectid)

    except BaseException as ex:
        utility.log_exception_file(ex, config.ConfigManager().LogFile)
Example #24
0
def write_fileinfo(filepath, dict_object):
    filename = filepath.replace(config.ConfigManager().PCFileFolder + '/', '')
    # FileInfo = filename + ', ' + (dict_object['record'])['uniq_id']
    # print(FileInfo)
    #     listdata = []
    sublist = []
    sublist = [filename, (dict_object['record'])['uniq_id']]
    listdata_uniqueids.append(sublist)
Example #25
0
def generatenounphrases(row):
    if row['source'] == config.ConfigManager().ST:
        description = row['description']
        noun_phrases = dcrnlp.extract_nounphrases_sentences(description)
    elif row['source'] == config.ConfigManager().promptCloud:
        description = row['description']
        noun_phrases = dcrnlp.extract_nounphrases_sentences(description)
    else:
        desc = str(row['jobTitle']) + ' ' + str(
            row['jobDescription']) + ' ' + str(
                row['mandatorySkills']) + ' ' + str(row['desiredSkills'])
        row['description'] = desc
        noun_phrases = dcrnlp.extract_nounphrases_sentences(desc)
    row['nounPhrases'] = noun_phrases
    row['nounPhraseFlag'] = 1
    row['dateCreated'] = datetime.datetime.utcnow()
    row['dateModified'] = datetime.datetime.utcnow()
    return row
def pc_rates_add_fields(record_object, filepath):
    record_object['dateCreated'] = datetime.datetime.utcnow()
    record_object['dateModified'] = datetime.datetime.utcnow()
    record_object['createdUser'] = '******'
    record_object['modifiedUser'] = '******'
    record_object['source'] = config.ConfigManager().promptCloud
    record_object['fileName'] = filepath.replace(
        config.ConfigManager().PCRatesFileFolder + '/', '')
    record_object['description'] = ''
    # if 'rate_value' in record_object and record_object['rate_value'] != '':
    #     record_object['maxBillRate'] = record_object['rate_value']

    if 'job_description' in record_object and record_object[
            'job_description'] != '':
        record_object['jobDescription'] = record_object['job_description']
        record_object['description'] = ''
        record_object['description'] += record_object['jobDescription']
        if 'jobtitle' in record_object and record_object['jobtitle'] != '':
            record_object['jobTitle'] = record_object['jobtitle']
            record_object['description'] += record_object['jobTitle']
    if 'jobdescription' in record_object and record_object[
            'jobdescription'] != '':
        record_object['jobDescription'] = record_object['jobdescription']
        record_object['description'] = ''
        record_object['description'] += record_object['jobDescription']
        if 'jobtitle' in record_object and record_object['jobtitle'] != '':
            record_object['jobTitle'] = record_object['jobtitle']
            record_object['description'] += record_object['jobTitle']
        if 'skills' in record_object and record_object['skills'] != '':
            record_object['description'] += record_object['skills']

    if 'postdate' in record_object and record_object['postdate'] != '':
        record_object['postDate'] = record_object['postdate']
    else:
        record_object['postDate'] = datetime.datetime.today().strftime(
            '%Y-%m-%d')

    if 'sitename' in record_object and record_object['sitename'] != '':
        record_object['dataSource'] = record_object['sitename']
    if 'site_name' in record_object and record_object['site_name'] != '':
        record_object['dataSource'] = record_object['site_name']
    return record_object
def job_info_analysis_storage(page_dict_object, filepath, dbrecordcount):
    global totalrecords
    global invalidrecords
    global emptydesc
    global incompletedesc
    global smalldesc
    global nonedesc
    global nodesc
    global totaljobsdict
    global jobsitedict

    dict_object_record_list = []
    try:
        dict_object = page_dict_object['page']
        # outer if check is jobdescription tag is in the xml
        if 'jobdescription' in (dict_object['record']):
            # checking if job description is none
            if ((dict_object['record'])['jobdescription'] is not None):

                incorrectjobdescription = 0

                if (((dict_object['record'])['jobdescription']).strip()) == '':
                    incorrectjobdescription = 1

                if (len(((dict_object['record'])['jobdescription'])) < 20):
                    incorrectjobdescription = 1

                if (((dict_object['record'])['jobdescription']).strip()[-3:]
                    ) == '...':
                    incorrectjobdescription = 1

                if (incorrectjobdescription == 0):
                    (dict_object['record']
                     )['dateCreated'] = datetime.datetime.now()
                    (dict_object['record']
                     )['dateModified'] = datetime.datetime.now()
                    (dict_object['record'])['createdUser'] = '******'
                    (dict_object['record'])['modifiedUser'] = '******'
                    (dict_object['record'])['source'] = 'PromptCloud'
                    (dict_object['record'])['Url'] = dict_object['pageurl']
                    (dict_object['record'])['fileName'] = filepath.replace(
                        config.ConfigManager().PCFileFolder + '/', '')
                    dict_object_record_list.append(dict_object['record'])
                    dbrecordcount += 1

    except BaseException as ex:
        utility.log_exception_file(
            ex,
            dcrconfig.ConfigManager().SemanticGraphLogFile)
    if dict_object_record_list:
        insert_to_db(dict_object_record_list)
    # updating doc_id in config table

    return dbrecordcount
def automate_processes():
    utility.write_to_file(config.ConfigManager().PromptcloudLogFile, 'a',
                          'PromptCloudautomationscript running')
    try:
        # download files into PCCompData with in mnt/nlpdata,xml format..
        exec(
            open('rates_pc_download_crawldata_threading.py').read(), globals())
        # compress the PCCompdata folder
        exec(open('compress.py').read(), globals())
        # unzip files created in PCData folder time stored in dataloadconfig..
        exec(open('pc_rates_unzip_gz.py').read(), globals())
        # download data into pcdataanalysisresults.ods
        exec(open('pc_rates_dataload.py').read(), globals())
        # for automatically sending emails
        # exec(open('mailsend.py').read(), globals())
        # store analysis file in s3 backup
        # exec(open('pcdataanalysisbackup.py').read(), globals())
    except BaseException as ex:
        utility.log_exception_file(ex,
                                   config.ConfigManager().PromptcloudLogFile)
Example #29
0
def datamasking(row):
    maskingText = makingjsondata(row)
    maskingText = json.dumps(maskingText)
    headers = {"Content-Type": "application/json"}
    conn = http.client.HTTPConnection(config.ConfigManager().Host,
                                      config.ConfigManager().Port)
    conn.request(config.ConfigManager().JobServerMethod,
                 config.ConfigManager().API, maskingText, headers)
    response = conn.getresponse()
    data = response.read()
    result = json.loads(data.decode('utf8'))
    try:
        row['supplierName'] = result['supplierName']
        row['clientId'] = result['clientId']
        row['mspId'] = result['mspId']
        row['dataSource'] = result['dataSource']
        # row['source'] = result['source']
    except BaseException as ex:
        print(ex)
        utility.log_exception_file(ex, file)
    conn.close()
    return row
Example #30
0
 def __init__(self, parent, manager):
     """
     Creates a new tab widget instance having the given parent and utilising the given
     manager.
     
     Arguments:
         parent (QWidget): parent of this widget
         manager (manage.FileManager): file management interface
     """
     super().__init__(parent, manager)
     self.configure = config.ConfigManager()
     self.ignore_keywords = []
     self.init_layout()