Esempio n. 1
0
def get_projects_via_queue():
    print("Projects queue is live")
    mycol = refer_projects_col()
    connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
    projects_client = QueueClient.from_connection_string(connect_str, "projects-queue")

    while (True):
        time.sleep(10)
        rows = projects_client.receive_messages()
        # print(rows)
        for msg in rows:

            # time.sleep(60)
            row = msg.content
            row = ast.literal_eval(row)

            print(row[0],' processing queries from the key phrases')
            entry_id = ObjectId(row[0])
            proj_data_entry = mycol.find({"_id": entry_id})
            data = [i for i in proj_data_entry]
            # print(data[0])
            key_phrases = data[0]['key_phrases']
            queries = process_queries(key_phrases)
            query_count = len(queries)
            mycol.update_one({'_id': entry_id },
                                        {'$set': {'query_count': query_count}})
            for each_query in queries:
                print(each_query," adding to pipeline execution")
                add_to_initial_crawling_queue([each_query+' ++'+str(entry_id)+' --project'])
            projects_client.delete_message(msg)
            add_to_project_completion_queue([entry_id])
Esempio n. 2
0
def create_and_queue_project(project_name,key_phrases):
    mycol = refer_projects_col()
    started_time = datetime.now()
    project_dict = {'project_name':project_name,'key_phrases':key_phrases,'created_time':started_time,'state':'queued'}
    record_entry = mycol.insert_one(project_dict)
    print("Project stored in db: ", record_entry.inserted_id)
    print("Adding to projects queue")
    add_to_projects_queue([record_entry.inserted_id])

# create_and_queue_project('Risk Management project',['Hazard management companies'])
# get_projects_via_queue()

# print(process_queries(['medical management system']))
Esempio n. 3
0
def create_and_queue_project(project_name, key_phrases):
    mycol = refer_projects_col()
    started_time = datetime.now()
    project_dict = {
        'project_name': project_name,
        'key_phrases': key_phrases,
        'created_time': started_time,
        'state': 'queued'
    }
    record_entry = mycol.insert_one(project_dict)
    print("Project stored in db: ", record_entry.inserted_id)
    print("Adding to projects queue")
    add_to_projects_queue([record_entry.inserted_id])


# create_and_queue_project('Educational softwares project',['school management', 'educational software'])
Esempio n. 4
0
def project_state_update_via_queue():
    print("Project state updating queue is live")
    proj_collection = refer_projects_col()
    query_collection = refer_query_col()

    connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
    project_comp_client = QueueClient.from_connection_string(
        connect_str, "project-completion-queue")

    while (True):
        # print('*')
        time.sleep(600)
        rows = project_comp_client.receive_messages()
        for msg in rows:
            time.sleep(10)
            row = msg.content
            row = ast.literal_eval(row)
            print(row[0])
            entry_id = ObjectId(row[0])
            project_data_entry = proj_collection.find({"_id": entry_id})
            data = [i for i in project_data_entry]
            #check_for_the_completion_of_components
            try:
                associated_queries = data[0]['associated_queries']
                completed_count = 0
                for each_query_res in associated_queries:
                    que_entry = query_collection.find({"_id": each_query_res})
                    data_res = [i for i in que_entry]
                    if (data_res[0]['state'] == 'Completed'):
                        completed_count += 1
                print(['comp', completed_count, data[0]['query_count']])
                if (completed_count == data[0]['query_count']):
                    print("All the queries are completed for the project",
                          completed_count)
                    proj_collection.update_one(
                        {'_id': entry_id}, {'$set': {
                            'state': 'Completed'
                        }})
                    project_comp_client.delete_message(msg)

            except KeyError as e:
                print('Project is not yet ready', e)
            except IndexError as e:
                print('Yet project entry not available')
            except Exception as e:
                print("Exception Occured during dumping ", e)
def get_entries_project(project_id):
    projects_col= refer_projects_col()
    query_collection = refer_query_col()
    proj_data_entry = projects_col.find({"_id": project_id})
    proj_data = [i for i in proj_data_entry]
    proj_attribute_keys = list(proj_data[0].keys())
    if ('associated_queries' in proj_attribute_keys):
        associated_queries = proj_data[0]['associated_queries']
        for each_query in associated_queries:
            query_data_entry = query_collection.find({"_id": ObjectId(each_query)})
            query_data = [i for i in query_data_entry]
            query_attribute_keys = list(query_data[0].keys())
            if ('associated_entries' in query_attribute_keys):
                associated_entries = query_data[0]['associated_entries']
                obs_ids = [ObjectId(i) for i in associated_entries]
                return obs_ids


    else:
        print("This project do not have any queries yet")
        return []
def get_entries_project(project_id):
    completed_count = []
    incomplete_count = 0
    incompletes = []
    problems = []
    all_entires = []
    profile_col = refer_collection()
    projects_col = refer_projects_col()
    query_collection = refer_query_col()
    proj_data_entry = projects_col.find({"_id": project_id})
    print('proj', proj_data_entry)
    proj_data = [i for i in proj_data_entry]
    print('data', len(proj_data))
    proj_attribute_keys = list(proj_data[-1].keys())
    if ('associated_queries' in proj_attribute_keys):
        associated_queries = proj_data[-1]['associated_queries']
        for each_query in associated_queries:
            query_data_entry = query_collection.find(
                {"_id": ObjectId(each_query)})
            query_data = [i for i in query_data_entry]
            print([
                query_data[0]['search_query'], query_data[0]['state'],
                query_data[0]['_id']
            ])
            query_attribute_keys = list(query_data[0].keys())
            if ('associated_entries' in query_attribute_keys):
                associated_entries = query_data[0]['associated_entries']
                # print('kk',associated_entries)
                obs_ids = [ObjectId(i) for i in associated_entries]
                all_entires.extend(obs_ids)

                for k in obs_ids:
                    prof_data_entry = profile_col.find({"_id": k})
                    # print('proj', proj_data_entry)
                    prof_data = [i for i in prof_data_entry]
                    prof_attribute_keys = list(prof_data[0].keys())

                    if ('simplified_dump_state' in prof_attribute_keys):
                        if (prof_data[0]['simplified_dump_state'] ==
                                'Completed'):
                            completed_count.append(k)
                        # else:print(prof_data[0]['simplified_dump_state'])
                        elif (prof_data[0]['simplified_dump_state'] ==
                              'Incomplete'):
                            incomplete_count += 1
                            incompletes.append(k)
                        else:
                            problems.append(k)
                    else:
                        problems.append(k)
                #
                # print(['completed',completed_count,'all',len(obs_ids),'incompleted',incomplete_count,incompletes,'prob',problems])
                # # filt = []
                # # for k in obs_ids:
                # #     if(k not in problems):
                # #         filt.append(k)
                # # print('filt',filt)
                # if(completed_count==len(obs_ids)):
                #     query_collection.update_one({'_id': ObjectId(each_query)}, {'$set': {'state': 'Completed'}})

                # return obs_ids

        print('completed_count', len(list(set(completed_count))))
        print('incomplete_count', incomplete_count)
        print('incompletes', list(set(incompletes)))
        print('problems', list(set(problems)))
        print('all', all_entires)
        return {
            'incompletes': list(set(incompletes)),
            'problems': list(set(problems))
        }
        # all_entires = list(set(all_entires))m
        # return all_entires
    else:
        print("This project do not have any queries yet")
        return []
Esempio n. 7
0
    p12 = Process(target=get_cb_data_via_queue)
    p12.start()
    p13 = Process(target=simplified_export_via_queue)
    p13.start()
    p14 = Process(target=get_projects_via_queue)
    p14.start()
    p15 = Process(target=project_state_update_via_queue)
    p15.start()
    p16 = Process(target=query_state_update_via_queue)
    p16.start()

    connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
    ic_client = QueueClient.from_connection_string(connect_str,
                                                   "initial-crawling-queue")
    mycol = refer_collection()
    projects_col = refer_projects_col()
    while (True):
        rows = ic_client.receive_messages()
        for msg in rows:
            # time.sleep(120)
            row = msg.content
            row = ast.literal_eval(row)
            print(row[0])

            input_d = row[0].split("--")
            try:
                mode = input_d[1]
                s_text = input_d[0]
                if mode == 'query':
                    query = s_text.strip()
                    print("Searching a query")