Esempio n. 1
0
def praphrase_sentences(
    text,
    depth=int(constants.fetch_constant("language_depth")),
    project_id=constants.fetch_constant("google_project_id")):
    parent = client.location_path(project_id, "global")
    x = client.get_supported_languages(parent)
    target_laguages = [item.language_code for item in x.languages[:depth]]
    translated_text = []
    for language in target_laguages:
        response = client.translate_text(
            parent=parent,
            contents=[text],
            mime_type='text/plain',  # mime types: text/plain, text/html
            source_language_code='en-IN',
            target_language_code=language)
        for translation in response.translations:
            translated_text.append(translation.translated_text)
    result = []
    for lg, sentence in zip(target_laguages, translated_text):
        response = client.translate_text(
            parent=parent,
            contents=[sentence],
            mime_type='text/plain',  # mime types: text/plain, text/html
            source_language_code=str(lg),
            target_language_code="en")
        for translation in response.translations:
            result.append(translation.translated_text)
    return result
Esempio n. 2
0
def match_page():
    """
    URL for UI  for matching the sentence or org_snippets
    """
    try:
        print(signal_service.make_cached_signals_product(constants.fetch_constant("default_task_id")))
        prod_id = signal_service.make_cached_signals_product(constants.fetch_constant("default_task_id"))
        return render_template("match.html")
    except (sken_exceptions.NoSignalFound, sken_exceptions.NoProductFound)as exe:
        resp = Response(exe.message, status=500, mimetype='application/text')
        resp.headers['Access-Control-Allow-Origin'] = '*'
        return resp
Esempio n. 3
0
def make_snippets(df, snippet_ids, task_id):
    if len(df) != 0:
        sentences = df["text"].to_list()
        sentence_vectors = sken_singleton.Singletons.get_instance(
        ).perform_embeddings(sentences)
        vad_chunks = []
        for i in range(len(df)):
            vad_chunks.append(
                VadChunk(snippet_ids[i],
                         df["from_time"][i],
                         df["to_time"][i],
                         df["speaker"][i],
                         df["text"][i],
                         sentence_vectors[i],
                         None,
                         task_id,
                         df["orignal_ids"][i],
                         questions=None,
                         q_encoding=None,
                         encoding_method=constants.fetch_constant(
                             "encoding_method")))

        return vad_chunks
    else:
        return []
Esempio n. 4
0
def make_product_signal(signal_tokens, scores, threshold, value, product_id):
    signal_token_lists = []
    logger.info("Making signal_df")
    for token, score in zip(signal_tokens, scores):
        signal_token_lists.append({
            'val':
            pd.Series(make_root_word(signal_tokens[token])),
            'score':
            int(score)
        })

    df = pd.DataFrame(signal_token_lists)
    pickel_string = pickle.dumps(df)
    sql = "insert into public.product_signal (name, color, value, product_id, created_at, updated_at, is_active, " \
          "type, engine, match_type, do_generate) values(%s, '#f09600', %s, %s, now(), now(), true, '', " \
          "'RAZOR'" \
          ", 'BOTH', false) returning id; "

    rows, col_names = db.DBUtils.get_instance().execute_query(
        sql, (constants.fetch_constant("signal_name"), value, product_id),
        is_write=True,
        is_return=True)
    sql = "INSERT INTO public.signal_generated (signal_id, text_, created_at, snippets_id, is_active) VALUES(%s, %s, " \
          "now(), NULL, false); "
    db.DBUtils.get_instance().execute_query(
        sql, (rows[0][col_names.index("id")], value),
        is_write=True,
        is_return=False)
    sql = "INSERT INTO public.product_signal_file (product_signal_id, signal_file, threshold) VALUES(%s, %s, %s); "
    db.DBUtils.get_instance().execute_query(
        sql, (rows[0][col_names.index("id")], pickel_string, threshold),
        is_write=True,
        is_return=False)
    logger.info("Made signal entry in db")
Esempio n. 5
0
def get_synonyms(sentence):
    """
    This method breaks the sentence into tokens and gets the pos tags for them if the pos tag is not in the list of
    restricted token list it gets the synonyms for each token using any of the three methods
    @param sentence:
    @return:
    """
    global not_accepted_pos
    if len(sentence.split()) > 0:
        tokens = get_tokens(sentence)
        logger.info("Made {} tokens for {}".format(len(tokens), sentence))
        pos_tags = nltk.pos_tag(tokens)
        result = []
        for tag in pos_tags:
            if tag[1] not in not_accepted_pos:
                result.append(
                    get_synonyms_thesaurus(
                        tag[0], int(constants.fetch_constant("max_synonims"))))
            else:
                result.append({tag[0]: []})

        max_length = max([len(list(item.values())[0]) for item in result])
        return {"data": result, "max_len": max_length}
    else:
        raise sken_exceptions.NoTokensFound
Esempio n. 6
0
    def __init__(self):
        if Singletons.__instance is not None:
            raise Exception("This class is a singleton!")
        else:
            logger.info(" Loading Summarization model")
            self.model = AutoModelWithLMHead.from_pretrained(
                constants.fetch_constant("model_path"))
            logger.info(" Loading Tokenizer for the model")
            self.tokenizer = AutoTokenizer.from_pretrained(
                constants.fetch_constant("model_path"))
            logger.info("Checking CUDA availability")
            if torch.cuda.is_available():
                logger.info("Transferring model to gpu")
                self.device = "cuda"
                self.model.to(self.device)
            else:
                logger.info("CUDA not found using CPU")
                self.device = "cpu"

        Singletons.__instance = self
Esempio n. 7
0
def sentence_matching():
    sentence = request.form.get("sentence")
    vad_chunk = VadChunk(1, time.time(), time.time() + 1, "Agent", sentence, None, None,
                         None)
    result = scoring_service.vad_chunk_match(vad_chunk, constants.fetch_constant("default_prod_id"))
    output = []
    for item in result:
        output.append({"input_sentence": item.snippet_text, "signal": item.signal_text, "tokens": item.matched_tokens,
                       "score": str(item.score), "threshold": item.threshold, "id": item.signal_id,
                       "html_": ""})

    resp = Response(jsonpickle.encode(output),
                    mimetype='application/json')
    resp.headers['Access-Control-Allow-Origin'] = '*'
    return resp
def create_lq_maches(vad_chunks, threshold):
    """
    This method returns the caught lead_qualification facets that are caught for each snippet, only one facet signal
    can be caught across all the facets
    """
    caught_lq_facets = []
    logger.info("Making caught facets for lead_qualification")
    for vad_chunk in vad_chunks:
        if vad_chunk.q_encoding is not None:
            for i, question in enumerate(vad_chunk.questions):
                scores = np.zeros(
                    shape=(len(sken_singleton.Singletons.get_instance().
                               get_cached_lq_dims()),
                           max([
                               len(x.facet_signals)
                               for x in sken_singleton.Singletons.get_instance(
                               ).get_cached_lq_dims().values()
                           ])))
                for x, facet in enumerate(sken_singleton.Singletons.
                                          get_instance().get_cached_lq_dims()):
                    for y, facet_signal in enumerate(
                            sken_singleton.Singletons.get_instance(
                            ).get_cached_lq_dims()[facet].facet_signals):
                        score = (
                            np.dot([vad_chunk.q_encoding[i]],
                                   np.array(facet_signal.embedding).T) /
                            (np.linalg.norm([vad_chunk.q_encoding[i]]) *
                             np.linalg.norm(facet_signal.embedding)))[0][0]
                        scores[x, y] = score
                if np.amax(scores) >= float(threshold):
                    facet_index, facet_signal_index = np.where(scores == np.amax(scores))[0][0], \
                                                      np.where(scores == np.amax(scores))[1][0]
                    facet = sken_singleton.Singletons.get_instance(
                    ).get_cached_lq_dims()[list(
                        sken_singleton.Singletons.get_instance(
                        ).get_cached_lq_dims().keys())[facet_index]]
                    facet_signal = facet.facet_signals[facet_signal_index]
                    caught_lq_facets.append(
                        CaughtFacetSignals(
                            vad_chunk, vad_chunk.text, question, facet.name,
                            facet_signal, facet_signal.text, np.amax(scores),
                            constants.fetch_constant("encoding_method"),
                            "Lead-Qualification"))
    return caught_lq_facets
Esempio n. 9
0
def make_snippet_question_embeddings(vad_chunk):
    """
    Sets the sentence embedding of snippet questions if present else sets it to None
    :param vad_chunk:
    :return: None
    """
    if vad_chunk.questions is not None:
        vad_chunk.set_question_encoding(
            sken_singleton.Singletons.get_instance().perform_embeddings(
                vad_chunk.questions),
            constants.fetch_constant("encoding_method"))
        logger.info(
            "Calculated  embeddings for {} snippet questions for snippet_id ={}"
            .format(len(vad_chunk.questions), vad_chunk.sid))
    else:
        logger.info(
            "There were not snippet questions for snippet_id={}".format(
                vad_chunk.sid))
        vad_chunk.set_question_encoding(None, None)
Esempio n. 10
0
    def __init__(self):
        if DBUtils.__instance is not None:
            raise Exception("This is a singleton class ")
        else:
            logger.info(
                "Initializing connection pool for database connection, should happen only once during startup. with {}".format(
                    constants.fetch_constant("host")))
            self.sales_pool = pool.ThreadedConnectionPool(constants.fetch_constant("min_pool"),
                                                          constants.fetch_constant("max_pool"),
                                                          host=constants.fetch_constant("host"),
                                                          user="******",
                                                          password=constants.fetch_constant("password"),
                                                          port="5432",
                                                          database=constants.fetch_constant("db_name"))

            logger.info("Made {} max_connections ".format(self.sales_pool.maxconn))
            DBUtils.__instance = self
Esempio n. 11
0
import time

from google.cloud import translate
from concurrent.futures import ThreadPoolExecutor
import multiprocessing
from src.utilities import constants, sken_logger, db
import spacy
import textacy

logger = sken_logger.get_logger("sentence_services")

nlp = spacy.load("en_core_web_sm")

client = translate.TranslationServiceClient()
parent = client.location_path(constants.fetch_constant("translate_project_id"),
                              "global")
target_laguages = [
    item.language_code for item in client.get_supported_languages(
        parent).languages[:constants.fetch_constant("translation_depth")]
]


def paraphrase_sentence(text):
    global parent, target_laguages

    def get_the_other(language):
        response = client.translate_text(
            parent=parent,
            contents=[text],
            mime_type='text/plain',  # mime types: text/plain, text/html
            source_language_code='en-IN',
Esempio n. 12
0
def upload_csv():
    global tmp_pro_id, request_count

    if request.method == "POST":
        if 'file' not in request.files:
            flash('No file part')
            return redirect(request.url)
    file = request.files['file']
    threshold = request.form.get("threshold")
    org_id = request.form.get("organization")
    product_id = request.form.get("product_id")
    if org_id:
        if file.filename == '':
            flash('No selected file')
            return redirect(request.url)
    input_filename = secure_filename(file.filename)
    input_file_path = os.path.join(
        app.config[constants.fetch_constant("UPLOAD_FOLDER")], input_filename)
    if os.path.exists(input_file_path):
        logger.info("File path {} already exists so removing this file".format(
            input_file_path))
        os.remove(input_file_path)
    logger.info("Making new file {}".format(input_file_path))
    file.save(input_file_path)

    if request_count == 0:
        logger.info(
            "This is the first request for organization={} and product={}".
            format(org_id, product_id))
        tmp_pro_id = product_id
        request_count += 1
        resp = Response(jsonpickle.encode(
            dimension_engine.wraper_method(input_file_path, org_id, product_id,
                                           threshold)),
                        mimetype='application/json')
        resp.headers['Access-Control-Allow-Origin'] = '*'

    elif request != 0 and tmp_pro_id != product_id:
        logger.info(
            "First request for organization={} and product={} clearing the cache_facets for old_product={}"
            .format(org_id, product_id, tmp_pro_id))
        dimension_engine.refresh_cached_dims(org_id, product_id)
        request_count = 1
        tmp_pro_id = product_id
        resp = Response(jsonpickle.encode(
            dimension_engine.wrapper_method(input_file_path, org_id,
                                            threshold)),
                        mimetype='application/json')
        resp.headers['Access-Control-Allow-Origin'] = '*'

    else:
        request_count += 1
        logger.info(
            "This is {} request for  organization={} and product={}".format(
                request_count, org_id, tmp_pro_id))
        resp = Response(jsonpickle.encode(
            dimension_engine.wrapper_method(input_file_path, org_id,
                                            threshold)),
                        mimetype='application/json')
        resp.headers['Access-Control-Allow-Origin'] = '*'
    return resp
def make_cached_dimensions(org_id, prod_id):
    """
    This method caches the facet signals for the particular product and organization
    :param org_id:
    :param prod_id:
    :return:
    """
    if len(sken_singleton.Singletons.get_instance(
    ).get_cached_lq_dims()) == 0 or len(sken_singleton.Singletons.get_instance(
    ).get_cached_intro_dims()) == 0:
        logger.info(
            "Creating cached_dimensions for organization={} and product={}".
            format(org_id, prod_id))
        sql = "select dimension.id as dimid,dimension.name_ as dimname,facet.id as facet_id,facet.name_ as " \
              "facet_name,facet_signal.id as fsid,facet_signal.value as fsval,generated_facet_signals.id as gsid," \
              "generated_facet_signals.value as gs_value,facet_signal.org_id,facet_signal.product_id from dimension " \
              "left join facet on facet.dim_id = dimension.id left join facet_signal on facet_signal.facet_id = " \
              "facet.id left join generated_facet_signals on 	generated_facet_signals.facet_signal_id = " \
              "facet_signal.id where facet_signal.org_id=%s and facet_signal.product_id=%s group by dimension.id," \
              "facet.id,facet_signal.id,generated_facet_signals.id "

        rows, col_names = DBUtils.get_instance().execute_query(
            sql, (org_id, prod_id), is_write=False, is_return=True)
        kvp_id = as_id = a_id = b_id = i_id = n_id = None
        if len(rows) != 0:
            start = time.time()
            logger.info(
                "Making cache facet signals for organization= {} and product={}"
                .format(org_id, prod_id))
            kvp_facet_signals = []
            as_facet_signals = []
            authority_facet_singals = []
            budget_facte_singals = []
            interest_face_signals = []
            need_facet_singals = []
            for row in rows:
                if str(row[col_names.index(
                        "dimname")]).lower() == "introduction":
                    if str(row[col_names.index(
                            "facet_name")]).lower() == "key value proposition":
                        kvp_id = row[col_names.index("fsid")]
                        if row[col_names.index("gs_value")] is not None:
                            kvp_facet_signals.append(
                                FacetSignal(
                                    row[col_names.index("gsid")],
                                    row[col_names.index("gs_value")],
                                    row[col_names.index("fsid")],
                                    embedding=sken_singleton.Singletons.
                                    get_instance().perform_embeddings(
                                        row[col_names.index("gs_value")]),
                                    embedding_method=constants.fetch_constant(
                                        "encoding_method")))
                        else:
                            kvp_facet_signals.append(
                                FacetSignal(row[col_names.index("gsid")],
                                            row[col_names.index("gs_value")],
                                            row[col_names.index("fsid")],
                                            embedding=None,
                                            embedding_method=None))
                    else:
                        as_id = row[col_names.index("fsid")]
                        if row[col_names.index("gs_value")] is not None:
                            as_facet_signals.append(
                                FacetSignal(
                                    row[col_names.index("gsid")],
                                    row[col_names.index("gs_value")],
                                    row[col_names.index("fsid")],
                                    embedding=sken_singleton.Singletons.
                                    get_instance().perform_embeddings(
                                        row[col_names.index("gs_value")]),
                                    embedding_method=constants.fetch_constant(
                                        "encoding_method")))
                        else:
                            as_facet_signals.append(
                                FacetSignal(row[col_names.index("gsid")],
                                            row[col_names.index("gs_value")],
                                            row[col_names.index("fsid")],
                                            embedding=None,
                                            embedding_method=None))
                else:
                    if str(row[col_names.index(
                            "facet_name")]).lower() == "authority":
                        a_id = row[col_names.index("fsid")]
                        if row[col_names.index("gs_value")] is not None:
                            authority_facet_singals.append(
                                FacetSignal(
                                    row[col_names.index("gsid")],
                                    row[col_names.index("gs_value")],
                                    row[col_names.index("fsid")],
                                    embedding=sken_singleton.Singletons.
                                    get_instance().perform_embeddings(
                                        row[col_names.index("gs_value")]),
                                    embedding_method=constants.fetch_constant(
                                        "encoding_method")))
                        else:
                            authority_facet_singals.append(
                                FacetSignal(row[col_names.index("gsid")],
                                            row[col_names.index("gs_value")],
                                            row[col_names.index("fsid")],
                                            embedding=None,
                                            embedding_method=None))
                    elif str(row[col_names.index(
                            "facet_name")]).lower() == "budget":
                        b_id = row[col_names.index("fsid")]
                        if row[col_names.index("gs_value")] is not None:
                            budget_facte_singals.append(
                                FacetSignal(
                                    row[col_names.index("gsid")],
                                    row[col_names.index("gs_value")],
                                    row[col_names.index("fsid")],
                                    embedding=sken_singleton.Singletons.
                                    get_instance().perform_embeddings(
                                        row[col_names.index("gs_value")]),
                                    embedding_method=constants.fetch_constant(
                                        "encoding_method")))
                        else:
                            budget_facte_singals.append(
                                FacetSignal(row[col_names.index("gsid")],
                                            row[col_names.index("gs_value")],
                                            row[col_names.index("fsid")],
                                            embedding=None,
                                            embedding_method=None))
                    elif str(row[col_names.index(
                            "facet_name")]).lower() == "interest":
                        i_id = row[col_names.index("fsid")]
                        if row[col_names.index("gs_value")] is not None:
                            interest_face_signals.append(
                                FacetSignal(
                                    row[col_names.index("gsid")],
                                    row[col_names.index("gs_value")],
                                    row[col_names.index("fsid")],
                                    embedding=sken_singleton.Singletons.
                                    get_instance().perform_embeddings(
                                        row[col_names.index("gs_value")]),
                                    embedding_method=constants.fetch_constant(
                                        "encoding_method")))
                        else:
                            interest_face_signals.append(
                                FacetSignal(row[col_names.index("gsid")],
                                            row[col_names.index("gs_value")],
                                            row[col_names.index("fsid")],
                                            embedding=None,
                                            embedding_method=None))
                    else:
                        n_id = row[col_names.index("fsid")]
                        if row[col_names.index("gs_value")] is not None:
                            need_facet_singals.append(
                                FacetSignal(
                                    row[col_names.index("gsid")],
                                    row[col_names.index("gs_value")],
                                    row[col_names.index("fsid")],
                                    embedding=sken_singleton.Singletons.
                                    get_instance().perform_embeddings(
                                        row[col_names.index("gs_value")]),
                                    embedding_method=constants.fetch_constant(
                                        "encoding_method")))
                        else:
                            need_facet_singals.append(
                                FacetSignal(row[col_names.index("gsid")],
                                            row[col_names.index("gs_value")],
                                            row[col_names.index("fsid")],
                                            embedding=None,
                                            embedding_method=None))

            with ThreadPoolExecutor(max_workers=6) as executor:
                executor.submit(
                    sken_singleton.Singletons.get_instance().
                    set_cached_intro_dims, "key_value_proposition",
                    Facet(kvp_id, "key value proposition", kvp_facet_signals))
                executor.submit(
                    sken_singleton.Singletons.get_instance().
                    set_cached_intro_dims, "aspiration_setting",
                    Facet(as_id, "aspiration setting", as_facet_signals))
                executor.submit(
                    sken_singleton.Singletons.get_instance().
                    set_cached_lq_dims, "authority",
                    Facet(a_id, "authority", authority_facet_singals))
                executor.submit(
                    sken_singleton.Singletons.get_instance().
                    set_cached_lq_dims, "budget",
                    Facet(b_id, "budget", budget_facte_singals))
                executor.submit(
                    sken_singleton.Singletons.get_instance().
                    set_cached_lq_dims, "interest",
                    Facet(i_id, "interest", interest_face_signals))
                executor.submit(
                    sken_singleton.Singletons.get_instance().
                    set_cached_lq_dims, "need_investigation",
                    Facet(n_id, "need investigation", need_facet_singals))

            logger.info(
                "Cached {} facet signals for org={} and product={} in {}".
                format(
                    len(kvp_facet_signals + as_facet_signals +
                        authority_facet_singals + interest_face_signals +
                        budget_facte_singals + need_facet_singals), org_id,
                    prod_id, (time.time() - start)))

        else:
            logger.info(
                "No facet_signals found for organization={} and product={}".
                format(org_id, prod_id))
            raise sken_exceptions.NoFacetFound(org_id, prod_id)
    else:
        logger.info(
            "Skipping caching of facet_signals for organization={} and product_id ={}, they already exist in RAM"
            .format(org_id, prod_id))
Esempio n. 14
0
from flask import Flask, request, Response, render_template, flash, redirect, send_file
from werkzeug.utils import secure_filename

from src.utilities import sken_logger, db, sken_singleton, constants
from src.services import dimension_engine
from src.services import facet_service

logger = sken_logger.get_logger("main")

sken_singleton.Singletons.get_instance()
db.DBUtils.get_instance()
tmp_pro_id = None  # used to catch and reset the catch if new product request is made
request_count = 0

app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = constants.fetch_constant("upload_folder")


@app.route('/')
def index():
    return render_template('index.html')


@app.route("/upload_file", methods=["POST", "GET"])
def upload_csv():
    global tmp_pro_id, request_count

    if request.method == "POST":
        if 'file' not in request.files:
            flash('No file part')
            return redirect(request.url)