Example #1
0
def generate_matrices_remote(self,
                             containerid: str = None,
                             feats: int = 10,
                             words: int = 6,
                             vectors_path: str = None,
                             docs_per_feat: int = 0,
                             feats_per_doc: int = 3):
    """ Generating matrices on the remote server. This is used when nlp lives
        on its own machine.
    """
    container = Container.get_object(pk=containerid)
    FeaturesStatus.set_status_feats(
        containerid=container.pk,
        busy=True,
        feats=feats,
    )
    kwds = {
        'containerid': containerid,
        'feats': int(feats),
        'words': words,
        'docs_per_feat': int(docs_per_feat),
        'feats_per_doc': int(feats_per_doc),
        'path': container.get_folder_path(),
    }
    if os.path.isfile(vectors_path):
        celery.send_task(NLP_TASKS['factorize_matrices'], kwargs=kwds)
    else:
        celery.send_task(NLP_TASKS['compute_matrices'], kwargs=kwds)
Example #2
0
def monitor_crawl(containerid: int = None, crawlid: str = None):
    """This task takes care of the crawl callback.

       The first parameter is empty becasue it is called as a linked task
       receiving a list of endpoints from the scrapper.
    """
    celery.send_task(RMXWEB_TASKS['crawl_metrics'],
                     kwargs={'containerid': containerid},
                     link=process_crawl_resp.s(containerid, crawlid))
Example #3
0
def delete_data_from_container(containerid: str = None,
                               data_ids: List[int] = None):
    """
    :param containerid:
    :param data_ids:
    :return:
    """
    container = Container.get_object(containerid)
    DataModel.delete_many(data_ids=data_ids, containerid=containerid)
    if container.matrix_exists:
        celery.send_task(RMXWEB_TASKS['integrity_check'],
                         kwargs={'containerid': containerid})
Example #4
0
def integrity_check(containerid: str = None):
    """
    Checks the integrity of the container after the crawler finishes.
    :param containerid:
    :return:
    """
    obj = Container.get_object(pk=containerid)
    obj.set_integrity_check_in_progress()
    celery.send_task(NLP_TASKS['integrity_check'],
                     kwargs={
                         'containerid': containerid,
                         'path': obj.get_folder_path(),
                     })
Example #5
0
def get_features(containerid: int = None,
                 container=None,
                 feats: int = None,
                 words: int = None,
                 **_):
    """
    :param containerid:
    :param container:
    :param feats:
    :param words:
    :return:
    """
    resp = celery.send_task(NLP_TASKS['retrieve_features'],
                            kwargs={
                                'containerid': containerid,
                                'feats': feats,
                                'path': container.get_folder_path(),
                                'words': words
                            }).get()
    if resp:
        return {'success': True, 'data': resp}
    else:
        return {
            'success': False,
            'msg': f'no features for feature number {feats}'
        }
Example #6
0
File: emit.py Project: dbrtk/rmxweb
def get_available_features(containerid: int = None, folder_path: str = None):
    """Retrieves available features from nlp"""
    return celery.send_task(NLP_TASKS['available_features'],
                            kwargs={
                                'corpusid': containerid,
                                'path': folder_path
                            }).get()
Example #7
0
File: emit.py Project: dbrtk/rmxweb
def crawl_async(url_list: list = None, containerid=None, depth=1):
    """Starting the crawler in scrasync. Starting the task that will monitor
       the crawler.
    """
    crawlid = celery.send_task(SCRASYNC_TASKS['launch_crawl'],
                               kwargs={
                                   'endpoint': url_list,
                                   'containerid': containerid,
                                   'depth': depth
                               }).get()
    # the countdown argument is here to make sure that this task does not
    # start immediately as prometheus may be empty.
    celery.send_task(RMXWEB_TASKS['monitor_crawl'],
                     kwargs={
                         'containerid': containerid,
                         'crawlid': crawlid
                     },
                     countdown=CRAWL_START_MONITOR_COUNTDOWN)
    return crawlid
Example #8
0
    def wrapped_view(containerid: int = None,
                     words: int = 10,
                     features: int = 10,
                     docsperfeat: int = 5,
                     featsperdoc: int = 3,
                     **kwds):

        container = Container.get_object(pk=containerid)
        availability = container.features_availability(feature_number=features)
        out = {
            'busy': True,
            'retry': True,
            'success': False,
            'available': False,
            'features': features,
            'containerid': container.pk
        }
        if availability.get('busy'):
            return out
        if availability.get('available'):
            out = {
                'words': words,
                'feats': features,
                'docs_per_feat': docsperfeat,
                'feats_per_doc': featsperdoc,
                'container': container,
                'containerid': container.pk
            }
            out.update(kwds)
            return func(**out)
        celery.send_task(config.RMXWEB_TASKS['generate_matrices_remote'],
                         kwargs={
                             'containerid': container.pk,
                             'feats': features,
                             'vectors_path': container.get_vectors_path(),
                             'words': words,
                             'docs_per_feat': docsperfeat,
                             'feats_per_doc': featsperdoc
                         })
        out.update(availability)
        return out
Example #9
0
def process_crawl_resp(resp, containerid, crawlid):
    """
    Processing the response of the crawler. This task checks if the crawl is
    ready and if it finished. If yes, the integrity_check is called.

    This task processes the response form crawl_metrics.
    :param resp:
    :param containerid:
    :return:
    """
    crawl_status = Container.container_status(containerid)
    if resp.get('ready'):
        celery.send_task(SCRASYNC_TASKS['delete_crawl_status'],
                         kwargs={
                             'containerid': containerid,
                             'crawlid': crawlid
                         })
        container = Container.get_object(pk=containerid)
        container.set_crawl_ready(value=True)
        if not crawl_status['integrity_check_in_progress']:
            celery.send_task(RMXWEB_TASKS['integrity_check'],
                             kwargs={'containerid': containerid})
    else:
        celery.send_task(RMXWEB_TASKS['monitor_crawl'],
                         args=[containerid],
                         countdown=CRAWL_MONITOR_COUNTDOWN)
Example #10
0
def search_texts(words: typing.List[str] = None,
                 highlight: bool = None,
                 path: str = None) -> dict:
    """ Searching a collection of texts for a list of words.
    :param words:
    :param highlight:
    :param path:
    :return:
    """
    return celery.send_task(RMXGREP_TASK['search_text'],
                            kwargs={
                                'highlight': highlight,
                                'words': words,
                                'container_path': path,
                            }).get()
Example #11
0
def hierarchical_tree(containerid=None,
                      flat: bool = None,
                      container=None,
                      **_) -> dict:
    """
    :param containerid:
    :param container:
    :param flat:
    :return:
    """
    return celery.send_task(NLP_TASKS['hierarchical_tree'],
                            kwargs={
                                'containerid': containerid,
                                'flat': flat,
                            }).get(timeout=3)
Example #12
0
File: emit.py Project: dbrtk/rmxweb
def get_features(feats: int = 10,
                 words: int = 6,
                 containerid: int = None,
                 path: str = None,
                 docs_per_feat: int = 0,
                 feats_per_doc: int = 3):
    """ Getting the features from nlp. This will call a view method that
        will retrieve or generate the requested data.
    """
    return celery.send_task(NLP_TASKS['features_and_docs'],
                            kwargs={
                                'path': path,
                                'feats': feats,
                                'containerid': containerid,
                                'words': words,
                                'docs_per_feat': docs_per_feat,
                                'feats_per_doc': feats_per_doc
                            }).get()