def main(args):
    argsmap = parseargs(args)

    files = argsmap.get('files')
    if (not files):
        sys.exit(0)

    topic = argsmap.get("topic")
    if (not topic):
        topic = "IT Assessment"
    else:
        topic = topic[0]

    summaryfile = argsmap.get("summary")
    if (not summaryfile):
        print("Summary file must be specified...")
        sys.exit(1)
    summaryfile = summaryfile[0]
    summary_map = find_topics.get_summary_map(summaryfile)

    tmodelfile = argsmap.get("tmodel")
    if (not tmodelfile):
        print('Topic Model must be specified. using --tmmodel ..')
        sys.exit(1)
    tmodelfile = tmodelfile[0]
    (origmap, sorted_y, vectorizer, le,
     grid_search) = read_model_file(tmodelfile)
    topics = find_topics.toc_entries(origmap)
    # print(topics)
    mapper = Mapper(origmap, sorted_y, vectorizer, le, grid_search)

    subtopicReader = SubtopicReader(topic, mapper, summary_map)

    smodelfile = argsmap.get("smodel")
    if (not smodelfile):
        print('Subtopic Model must be specified using --smodel ...')
        sys.exit(1)
    smodelfile = smodelfile[0]
    subtopicPredictor = SubtopicPredictor(smodelfile)

    for filename in files:
        subtopic_dict = subtopicReader.mapped_subtopics(
            filename, subtopicPredictor)
        subtopic_columns = subtopicReader.get_column_names(subtopicPredictor)
        print(json.dumps(subtopic_dict, indent=2))
        print('---------------------------------------')
        print(subtopic_columns)
Example #2
0
def main(args):
  argsmap = parseargs(args)
  # print(args_dict)

  files = argsmap.get('files')
  if (not files):
    sys.exit(0)

  topic = argsmap.get("topic")
  if (not topic):
    topic = "IT Assessment"
  else:
    topic = topic[0]

  summaryfile = argsmap.get("summary")
  if (not summaryfile):
    print("Summary file must be specified...")
    sys.exit(1)
  summaryfile = summaryfile[0]
  summary_map = find_topics.get_summary_map(summaryfile)
  # print(summary_map)

  modelfile = argsmap.get("model")
  if (not modelfile):
    print('Model must be specified...')
    sys.exit(1)
  modelfile = modelfile[0]
  (origmap, sorted_y, vectorizer, le, grid_search) = read_model_file(modelfile)
  topics = find_topics.toc_entries(origmap)
  # print(topics)
  mapper = Mapper(origmap, sorted_y, vectorizer, le, grid_search)

  subtopicReader = SubtopicReader(topic, mapper, summary_map)
  all_subtopics = subtopicReader.read_all_files(files, 'print_detail' in argsmap.keys()) 
  if 'print_summary' in argsmap.keys():
    subtopicReader.print_summary()
Example #3
0
def main(args):
  global NEWLINE_WITHIN_COLUMN
  argsmap = parseargs(args)

  files = argsmap.get('files')
  if (not files):
    sys.exit(0)

  summaryfile = argsmap.get("summary")
  if (not summaryfile or len(summaryfile) == 0):
    print('Summary file must be specified...')
    sys.exit(1)
  summary_map = get_summary_map(summaryfile[0])
  # print(summary_map)

  modelfile = argsmap.get("model")
  if (not modelfile):
    print('Model must be specified...')
    sys.exit(1)
  modelfile = modelfile[0]
  (origmap, sorted_y, vectorizer, le, grid_search) = read_model_file(modelfile)
  topics = toc_entries(origmap)
  mapper = Mapper(origmap, sorted_y, vectorizer, le, grid_search)

  nosplit = argsmap.get('nosplit')
  if nosplit == []:
    nosplit = True
  else:
    nosplit = False

  if not nosplit:
    topic_split_times = argsmap.get('split')
    if (not topic_split_times):
      topic_split_times = 4
    else:
      topic_split_times = int(topic_split_times[0])
  else:
    topic_split_times = 0

  NL = argsmap.get('NL') ## Set newline character for multiline columns
  if (NL):
    NL = NL[0]
    if (NL):
      NEWLINE_WITHIN_COLUMN = NL

  outfile = argsmap.get("out")
  if (outfile):
    outfile = outfile[0]

  exfile = argsmap.get("err")
  if exfile:
    exfile = exfile[0]
  if not exfile:
    print("Exception file name must be entered using the --err option...")
    sys.exit(1)

  ratings_mapper_file = argsmap.get("rmap")
  if ratings_mapper_file:
    ratings_mapper_file = ratings_mapper_file[0]
  if not ratings_mapper_file:
    print("Ratings Mapper File file name must be entered using the --rmap option...")
    sys.exit(1)

  ratings = Ratings(ratings_mapper_file)

  global CSV_FIELD_DELIMITER
  field_delim = argsmap.get('fd')
  if field_delim:
    field_delim = field_delim[0]
    if field_delim:
      CSV_FIELD_DELIMITER = field_delim

  global FD_REPLACED
  fd_replaced = argsmap.get('fdr')
  if fd_replaced:
    fd_replaced = fd_replaced[0]
    if fd_replaced:
      FD_REPLACED = fd_replaced
  
  smodels = argsmap.get("smodels")
  stopics = argsmap.get("stopics")
  get_headers_for_files(files, topics, mapper, summary_map, outfile, exfile, nosplit, topic_split_times, ratings, smodels, stopics)
Example #4
0
#!/usr/bin/env python

import sys
from predict_using_toc_mapper import Mapper, get_topic, read_model_file
from find_topics import toc_entries, get_summary_map, read_topics
import csv


def print_topics(topics):
    for topic in topics:
        print(topic + '*')


if __name__ == '__main__':
    modelfile = sys.argv[1]
    (origmap, sorted_y, vectorizer, le,
     grid_search) = read_model_file(modelfile)
    topics = toc_entries(origmap)
    mapper = Mapper(origmap, sorted_y, vectorizer, le, grid_search)
    # print(sorted_y)
    sorted_topics = sorted(topics, key=lambda x: x.lower())
    print_topics(sorted_topics)
    # print(len(origmap))
    if (len(sys.argv) > 2):
        f = open(sys.argv[2], 'w')
        writer = csv.writer(f)
        data = [[orig, "OK", mapped] for orig, mapped in origmap.items()]
        for line in data:
            writer.writerow(line)
        f.close()