Python Document.get_all_words 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: magpie.base.document

클래스/타입: Document

메소드/함수: get_all_words

hotexamples.com에서의 예제들: 4

Python Document.get_all_words - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 magpie.base.document.Document.get_all_words에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Document(11)

get_all_words(3)

read_sentences(2)

예제 #1

파일 보기

  def _predict(self, doc: Document, return_float=False):
    """
    Predict labels for a given Document object
    :param doc: Document object
    :return: list of labels with corresponding confidence intervals
    """
    set_tf_growth()
    if isinstance(self.keras_model.input, list):
      _, sample_length, embedding_size = self.keras_model.input_shape[0]
    else:
      _, sample_length, embedding_size = self.keras_model.input_shape
    words = doc.get_all_words()[:sample_length]
    x_matrix = np.zeros((1, sample_length, embedding_size))

    for i, w in enumerate(words):
      if w in self.word2vec_model.wv:
        word_vector = self.word2vec_model.wv[w].reshape(1, -1)
        scaled_vector = self.scaler.transform(word_vector, copy=True)[0]
        x_matrix[0][i] = scaled_vector

    if isinstance(self.keras_model.input, list):
      x = [x_matrix] * len(self.keras_model.input)
    else:
      x = [x_matrix]

    with tf.device('/cpu:0'):
      y_predicted = self.keras_model.predict(x)
    # return weighted avg of labels
    # return reduce(lambda acc, x: acc + (x[0] * x[1]), zipped, 1) #weighted avg
    # TODO make this return weighted avg or max prob a param
    # max probablitiy, corresponding to standard keras mmethodology
    # print(f'model output shape {self.keras_model.output_shape}')
    if self.keras_model.output_shape[1] == 1:
      # print(f'returning {y_predicted[0][0]}')
      float_y_pred = float(y_predicted[0][0])
      # if not isinstance(y_predicted[0][0], float):
      #   print(type(y_predicted[0][0]))
      #   print(y_predicted, y_predicted[0][0])
      assert(isinstance(float_y_pred, float))
      # print(float_y_pred)
      return float_y_pred
    elif return_float:
      zipped = zip(self.labels, y_predicted[0])
      return float(
          sorted(
              zipped,
              key=lambda elem: elem[1],
              reverse=True)[0][0])
    else:
      zipped = zip(self.labels, y_predicted[0])
      return sorted(zipped, key=lambda elem: elem[1], reverse=True)[0][0]

예제 #2

파일 보기

def build_x_and_y(data: DataList, **kwargs):
  """
  Given file names and their directory, build (X, y) data matrices
  :param filenames: iterable of strings showing file ids (no extension)
  :param file_directory: path to a directory where those files lie
  :param kwargs: additional necessary data for matrix building e.g. scaler

  :return: a tuple (X, y)
  """
  label_indices = kwargs['label_indices']
  word2vec_model = kwargs['word2vec_model']
  scaler = kwargs['scaler']
  nn_model = kwargs['nn_model']
  regression = kwargs.get('regression', False)

  x_matrix = np.zeros(
      (len(data),
       SAMPLE_LENGTH,
       word2vec_model.vector_size))
  if regression:
    # print('YES REGRESSION')
    y_matrix = np.zeros((len(data), 1), dtype=np.float_)
    # print(y_matrix)
  else:
    # print('NOT REGRESSION')
    y_matrix = np.zeros((len(data), len(label_indices)), dtype=np.bool_)

  for doc_id, example in enumerate(data):
    doc = Document(example['text'])
    words = doc.get_all_words()[:SAMPLE_LENGTH]

    for i, w in enumerate(words):
      if w in word2vec_model.wv:
        word_vector = word2vec_model.wv[w].reshape(1, -1)
        x_matrix[doc_id][i] = scaler.transform(word_vector, copy=True)[0]

    labels = [example['label']]

    for lab in labels:
      if regression:
        y_matrix[doc_id] = float(lab)
      else:
        index = label_indices[lab]
        y_matrix[doc_id][index] = True

  if nn_model and isinstance(nn_model.input, list):
    return [x_matrix] * len(nn_model.input), y_matrix
  else:
    return [x_matrix], y_matrix

예제 #3

파일 보기

파일: input_data.py 프로젝트: slon1024/magpie

def build_x_and_y(filenames, file_directory, **kwargs):
    """
    Given file names and their directory, build (X, y) data matrices
    :param filenames: iterable of strings showing file ids (no extension)
    :param file_directory: path to a directory where those files lie
    :param kwargs: additional necessary data for matrix building e.g. scaler

    :return: a tuple (X, y)
    """
    label_indices = kwargs['label_indices']
    word2vec_model = kwargs['word2vec_model']
    scaler = kwargs['scaler']
    nn_model = kwargs['nn_model']

    x_matrix = np.zeros((len(filenames), SAMPLE_LENGTH, EMBEDDING_SIZE))
    y_matrix = np.zeros((len(filenames), len(label_indices)), dtype=np.bool_)

    for doc_id, fname in enumerate(filenames):
        doc = Document(doc_id, os.path.join(file_directory, fname + '.txt'))
        words = doc.get_all_words()[:SAMPLE_LENGTH]

        for i, w in enumerate(words):
            if w in word2vec_model:
                word_vector = word2vec_model[w].reshape(1, -1)
                x_matrix[doc_id][i] = scaler.transform(word_vector,
                                                       copy=True)[0]

        labels = get_answers_for_doc(
            fname + '.txt',
            file_directory,
            filtered_by=set(label_indices.keys()),
        )

        for lab in labels:
            index = label_indices[lab]
            y_matrix[doc_id][index] = True

    if nn_model and type(nn_model.input) == list:
        return_data = [x_matrix] * len(nn_model.input), y_matrix
    else:
        return_data = [x_matrix], y_matrix

    if type(nn_model) == Graph:
        return {'input': return_data[0], 'output': return_data[1]}
    else:
        return return_data

예제 #4

파일 보기

파일: input_data.py 프로젝트: Rowl1ng/magpie

def build_x_and_y(filenames, file_directory, **kwargs):
    """
    Given file names and their directory, build (X, y) data matrices
    :param filenames: iterable of strings showing file ids (no extension)
    :param file_directory: path to a directory where those files lie
    :param kwargs: additional necessary data for matrix building e.g. scaler

    :return: a tuple (X, y)
    """
    label_indices = kwargs['label_indices']
    word2vec_model = kwargs['word2vec_model']
    scaler = kwargs['scaler']
    nn_model = kwargs['nn_model']

    x_matrix = np.zeros((len(filenames), SAMPLE_LENGTH, EMBEDDING_SIZE))
    y_matrix = np.zeros((len(filenames), len(label_indices)), dtype=np.bool_)

    for doc_id, fname in enumerate(filenames):
        doc = Document(doc_id, os.path.join(file_directory, fname + '.txt'))
        words = doc.get_all_words()[:SAMPLE_LENGTH]

        for i, w in enumerate(words):
            if w in word2vec_model:
                word_vector = word2vec_model[w].reshape(1, -1)
                x_matrix[doc_id][i] = scaler.transform(word_vector, copy=True)[0]

        labels = get_answers_for_doc(
            fname + '.txt',
            file_directory,
            filtered_by=set(label_indices.keys()),
        )

        for lab in labels:
            index = label_indices[lab]
            y_matrix[doc_id][index] = True

    if nn_model and type(nn_model.input) == list:
        return_data = [x_matrix] * len(nn_model.input), y_matrix
    else:
        return_data = [x_matrix], y_matrix

    if type(nn_model) == Graph:
        return {'input': return_data[0], 'output': return_data[1]}
    else:
        return return_data