Example #1
0
def import_results(args):
    """
    Read topic data generated from the learn module and store it in the
    database.
    """

    from trajectory.models import Course, Topic, CourseTopicAssociation
    from trajectory.models import ResultSet
    from trajectory.models.meta import session
    from trajectory import config as TRJ
    import logging, csv
    log = logging.getLogger("root")
    log.info("Begin topic import.")

    # Create a new result set.
    result_set = ResultSet(
        alpha=args.alpha,
        beta=args.beta,
        iterations=args.iterations
    )
    session.add(result_set)
    session.commit()

    # Add in new topic definitions.
    with open(args.topic_file, "r") as topic_file:
        topic_reader = csv.reader(topic_file, delimiter=",")
        next(topic_reader, None) # skip header
        topic_count = 0
        for topic in topic_reader:
            topic_count += 1
            session.add(Topic(
                id=topic[0],
                result_set=result_set,
                words=', '.join(topic[1:])
            ))
        result_set.num_topics = topic_count

    # Add the topics to their courses.
    courses = session.query(Course).all()
    course_query = session.query(Course)
    course_by_id = lambda c: course_query.get(c)
    with open(args.course_file, "r") as course_file:
        course_reader = csv.reader(course_file, delimiter=",")
        next(course_reader, None) # skip header
        topics_to_add = { # {course:[[id, weight], [id, weight], ...]}
            course_by_id(row[1]) : [
                topic.split(':') for topic in row[2:]
                    if float(topic.split(':')[1]) > TRJ.TOPIC_MIN_WEIGHT
            ] for row in course_reader if course_by_id(row[1]) is not None
        }
        for course, topic_list in topics_to_add.items():
            for (topicid, proportion) in topic_list:
                association = CourseTopicAssociation(proportion=proportion)
                association.topic_id = topicid
                association.result_set_id = result_set.id
                course.topics.append(association)

    log.info("Topic import complete.")
Example #2
0
def main():
    """
    Handle basic command line argument parsing & configure logging. Route
    logic depending on what the user wants to do.
    """

    # Create top-level command line argument parser.
    parser = ArgumentParser(description=TRJ.PROGRAM_DESC, prog=TRJ.PROGRAM_NAME)
    parser.add_argument("--version", action="version", version=TRJ.PROGRAM_VERSION)
    parser.add_argument("--debug", action="store_true")
    subparsers = parser.add_subparsers(dest="command",
        help="Either download new data or export existing data to disk.")

    # Create arguments for scraping.
    download_parser = subparsers.add_parser("download",
            help="Download data from the Web.")
    download_parser.add_argument("targets", choices=engines.list(),
            nargs="+",
            help="Scraping targets, select one or more.")
    download_parser.add_argument("--cs",
            help="Shortcut for just CS departments.",
            action="store_true")

    # Create arguments for exporting.
    export_parser = subparsers.add_parser("export",
            help="Export data to disk for analysis.")
    export_parser.add_argument("--data-directory",
            default="data",
            help="The export directory (default: 'data').",
            action="store")
    export_parser.add_argument("--departments",
            nargs="+",
            help="The departments to export.",
            action="store")
    export_parser.add_argument("--cs",
            help="Shortcut for just CS departments.",
            action="store_true")

    # Create arguments for importing topics.
    import_parser = subparsers.add_parser("import-results",
            help="Import learned topics to the database.")
    import_parser.add_argument("--topic-file",
            required=True,
            help="The stored topic key file from the learn module.",
            action="store")
    import_parser.add_argument("--course-file",
            required=True,
            help="The stored document key file from the learn module.",
            action="store")
    import_parser.add_argument("--alpha", required=False, action="store",
            help="Alpha value used in this run.")
    import_parser.add_argument("--beta", required=False, action="store",
            help="Beta value used in this run.")
    import_parser.add_argument("--iterations", required=False, action="store",
            help="Number of iterations used in this run.")

    # Parse command line arguments.
    args = parser.parse_args(sys.argv[1:])

    # Start up the program.
    log = trajectory.log.global_logger("root", debug=args.debug)
    log.info("Beginning trj-scrape.")

    # Wrap main control flow in a try/catch for safety.
    try:

        # Hand off control flow to export module.
        if args.command == "export":
            export(args)

        # Hand off control flow to scraper module.
        elif args.command == "download":
            scrape(args)

        # Hand off control to the import module.
        elif args.command == "import-results":
            import_results(args)

        # Otherwise no command was selected
        else:
            log.info("No command specified.")

        # Store any modifications to the database.
        session.commit()

    # Handle any unknown errors gracefully.
    except Exception as error:

        log.error("Unknown error encountered.")
        log.error(error)
        if args.debug:
            traceback.print_exc()

    # Shut down safely.
    finally:

        # Exit the program.
        log.info("Exiting.")
        session.close()
        sys.exit(0)
Example #3
0
def main():
    """
    Handle basic command line argument parsing & configure logging. Route
    logic depending on what the user wants to do.
    """

    # Create top-level command line argument parser.
    parser = ArgumentParser(description=TRJ.PROGRAM_DESC,
                            prog=TRJ.PROGRAM_NAME)
    parser.add_argument("--version",
                        action="version",
                        version=TRJ.PROGRAM_VERSION)
    parser.add_argument("--debug", action="store_true")
    subparsers = parser.add_subparsers(
        dest="command",
        help="Either download new data or export existing data to disk.")

    # Create arguments for scraping.
    download_parser = subparsers.add_parser("download",
                                            help="Download data from the Web.")
    download_parser.add_argument("targets",
                                 choices=engines.list(),
                                 nargs="+",
                                 help="Scraping targets, select one or more.")
    download_parser.add_argument("--cs",
                                 help="Shortcut for just CS departments.",
                                 action="store_true")

    # Create arguments for exporting.
    export_parser = subparsers.add_parser(
        "export", help="Export data to disk for analysis.")
    export_parser.add_argument("--data-directory",
                               default="data",
                               help="The export directory (default: 'data').",
                               action="store")
    export_parser.add_argument("--departments",
                               nargs="+",
                               help="The departments to export.",
                               action="store")
    export_parser.add_argument("--cs",
                               help="Shortcut for just CS departments.",
                               action="store_true")

    # Create arguments for importing topics.
    import_parser = subparsers.add_parser(
        "import-results", help="Import learned topics to the database.")
    import_parser.add_argument(
        "--topic-file",
        required=True,
        help="The stored topic key file from the learn module.",
        action="store")
    import_parser.add_argument(
        "--course-file",
        required=True,
        help="The stored document key file from the learn module.",
        action="store")
    import_parser.add_argument("--alpha",
                               required=False,
                               action="store",
                               help="Alpha value used in this run.")
    import_parser.add_argument("--beta",
                               required=False,
                               action="store",
                               help="Beta value used in this run.")
    import_parser.add_argument("--iterations",
                               required=False,
                               action="store",
                               help="Number of iterations used in this run.")

    # Parse command line arguments.
    args = parser.parse_args(sys.argv[1:])

    # Start up the program.
    log = trajectory.log.global_logger("root", debug=args.debug)
    log.info("Beginning trj-scrape.")

    # Wrap main control flow in a try/catch for safety.
    try:

        # Hand off control flow to export module.
        if args.command == "export":
            export(args)

        # Hand off control flow to scraper module.
        elif args.command == "download":
            scrape(args)

        # Hand off control to the import module.
        elif args.command == "import-results":
            import_results(args)

        # Otherwise no command was selected
        else:
            log.info("No command specified.")

        # Store any modifications to the database.
        session.commit()

    # Handle any unknown errors gracefully.
    except Exception as error:

        log.error("Unknown error encountered.")
        log.error(error)
        if args.debug:
            traceback.print_exc()

    # Shut down safely.
    finally:

        # Exit the program.
        log.info("Exiting.")
        session.close()
        sys.exit(0)