Beispiel #1
0
def lbd_only_terms_from_both_domains(input_dict):
    adc = input_dict['adc']
    bmc = input_dict['bow_model_constructor']

    raw_documents = bmc.get_raw_text(adc.documents,
                                     join_annotations_with='|##|')
    classes = bmc.get_document_labels(adc, binary=True)

    hc = HeuristicCalculations(raw_documents, classes,
                               bmc)  #only creates counts for each domain
    terms_in_both_domains_idx = np.where((hc._count_term_A() > 0)
                                         & (hc._count_term_C() > 0) == True)[0]
    orig_vocabulary = bmc.get_feature_names()
    terms_in_both_domains = [
        orig_vocabulary[i] for i in terms_in_both_domains_idx
    ]

    # if predefined_vocabulary:
    #     vocab_vectorizer=CountVectorizer(ngram_range=(1,max_ngram))
    #     vocab_vectorizer.fit(predefined_vocabulary)
    #     self.set_new_vocabulary(vocab_vectorizer.vocabulary_,raw_documents)
    bmc.set_new_vocabulary(terms_in_both_domains, raw_documents)

    bow_dataset = BowDataset.from_adc(adc, bmc)

    return {'bow_model_constructor': bmc, 'bow_dataset': bow_dataset}
Beispiel #2
0
def lbd_calculate_heuristics(input_dict):
    heuristic_names=input_dict.get('heuristics',[])
    adc=input_dict['adc']
    bow_model=input_dict['bow_model_constructor']

    raw_documents=bow_model.get_raw_text(adc.documents,join_annotations_with='|##|')
    classes=bow_model.get_document_labels(adc,binary=True)
    #stress_idx=bow_model.get_feature_names().index("stress")
    hc=HeuristicCalculations(raw_documents,classes,bow_model)#,stress_idx=stress_idx)
    calcs=hc.calculate_heuristics(heuristic_names)
    return {'calcs': calcs}
Beispiel #3
0
def lbd_calculate_heuristics(input_dict):
    heuristic_names = input_dict.get('heuristics', [])
    adc = input_dict['adc']
    bow_model = input_dict['bow_model_constructor']

    raw_documents = bow_model.get_raw_text(adc.documents,
                                           join_annotations_with='|##|')
    classes = bow_model.get_document_labels(adc, binary=True)
    #stress_idx=bow_model.get_feature_names().index("stress")
    hc = HeuristicCalculations(raw_documents, classes,
                               bow_model)  #,stress_idx=stress_idx)
    calcs = hc.calculate_heuristics(heuristic_names)
    return {'calcs': calcs}
Beispiel #4
0
def lbd_only_terms_from_both_domains(input_dict):
    adc=input_dict['adc']
    bmc=input_dict['bow_model_constructor']

    raw_documents=bmc.get_raw_text(adc.documents,join_annotations_with='|##|')
    classes=bmc.get_document_labels(adc,binary=True)

    hc=HeuristicCalculations(raw_documents,classes,bmc) #only creates counts for each domain
    terms_in_both_domains_idx= np.where((hc._count_term_A() > 0) & (hc._count_term_C() > 0)==True)[0]
    orig_vocabulary=bmc.get_feature_names()
    terms_in_both_domains=[orig_vocabulary[i] for i in terms_in_both_domains_idx]

    # if predefined_vocabulary:
    #     vocab_vectorizer=CountVectorizer(ngram_range=(1,max_ngram))
    #     vocab_vectorizer.fit(predefined_vocabulary)
    #     self.set_new_vocabulary(vocab_vectorizer.vocabulary_,raw_documents)
    bmc.set_new_vocabulary(terms_in_both_domains,raw_documents)

    bow_dataset=BowDataset.from_adc(adc,bmc)

    return {'bow_model_constructor': bmc,'bow_dataset': bow_dataset}