Esempio n. 1
0
import en_vectors_web_lg, en_core_web_lg
import json
import os
from keras.utils.np_utils import to_categorical
import logging_util
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import itertools
from matplotlib import cm
from sklearn.preprocessing import LabelEncoder

logdir = "./logging"
if not os.path.exists(logdir):
    os.makedirs(logdir)
logger = logging_util.logger(__name__, logdir)


def extract_words(doc, att_words, att_sents, sents_count, max_sentence_length,
                  nlp):

    sent_mean = np.mean(att_sents[:sents_count])
    sent_median = np.median(att_sents[:sents_count])
    sent_threshold = min(sent_mean, sent_median)

    phrases = {}

    for index, sentence in enumerate(doc.sents):
        if index >= sents_count:
            break
Esempio n. 2
0
from keras.engine.topology import Layer
from keras import initializers
from keras import backend as K
from keras.engine import InputSpec

import os

import logging_util
import tensorflow as tf

logger = logging_util.logger(__name__)


def dot_product(x, kernel):
    """
    Wrapper for dot product operation, in order to be compatible with both
    Theano and Tensorflow
    Args:
        x (): input
        kernel (): weights
    Returns:
    """
    if K.backend() == 'tensorflow':
        return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
    else:
        return K.dot(x, kernel)


class AttWeightLayer(Layer):
    def __init__(self, **kwargs):
        self.init_normal = initializers.get('normal')
Esempio n. 3
0
from collections import Counter
warnings.filterwarnings("ignore")
import logging_util
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
import os.path as path
# Load data

logdir = "../logging"
if not os.path.exists(logdir):
    os.makedirs(logdir)

data_path = '/Users/sli/Projects/data'
search_path = data_path + '/hyperparameter_search/mental_health_forum_simple_clf'
dataset_path = data_path + '/mental_health_forum_data'
logger = logging_util.logger("Hyper_search_simple_model",
                             logging_folder=logdir)

# Prepare data and label


def data_prep(df, params, if_resample=False):
    """
    Convert data from dataframe format into tensor of input and target
    :param df: dataframe containing disorder name, dialog
    :param params: parameter for data processing
    :param if_resample: whether to perform resampling to balance the sample size
    :return:
    output: dictionary containing data, encoded_label, binary_label
    label_encode: LabelEncoder() object for inverse fitting
    """