예제 #1
0
def _handle_plot_parser(args):
    for path in args.input:
        if not os.path.isfile(os.path.abspath(path)):
            LOGGER.error(f"File {path} does not exist.")
            sys.exit(1)
    if args.population:
        if not os.path.isfile(os.path.abspath(args.population)):
            LOGGER.error(f"File {path} does not exist.")
            sys.exit(1)
        args.population = _parse_population(args.population)

    stats.plot_predictions(args.input, args.alpha_level, args.output,
                           args.width, args.fill, args.patterns,
                           args.population)
예제 #2
0
def _handle_analyze_parser(args):
    inpath = os.path.abspath(args.input)

    if args.rows_per_file <= 0:
        LOGGER.error(f"At least 1 row per file is required.")
        sys.exit(1)
    elif not os.path.isfile(inpath):
        LOGGER.error(f"File {inpath} does not exist!")
        sys.exit(1)

    outpath = os.path.abspath(args.output)
    senti4sd_pool_root = os.path.abspath(args.senti4sd_pool_root)
    classify.classify_sentiment(args.rows_per_file, senti4sd_pool_root, inpath,
                                outpath)
def _try_commit_and_flush():
    """Try to commit and flush the private session.

    Return True if success.
    """
    try:
        _session.commit()
        _session.flush()
    except Exception as e:
        LOGGER.error(
            f"Unexpected exception:\n{type(e).__name__}: {str(e)}\nRolling back"
        )
        _session.rollback()
        return False
    return True
async def _async_classify_sentiment(path_to_classifier: str, inpath: str,
                                    outpath: str):
    """Run the classification task asynchronously."""
    dir_name, script_name = os.path.split(path_to_classifier)
    out_file = os.path.basename(outpath)
    # the classification script must be run from the senti4SD directory
    command = ['/bin/bash', script_name, os.path.abspath(inpath), out_file]
    process = await asyncio.create_subprocess_exec(*command, cwd=dir_name)
    await process.communicate()
    if process.returncode != 0:
        with open(inpath, 'r') as f:
            LOGGER.error(
                f"Failed to classify {inpath} containing: {''.join(f.readlines())}"
            )
        raise ClassificationError(f"Classifying {inpath} failed.")
    shutil.move(os.path.join(dir_name, out_file), outpath)
예제 #5
0
def _post_xml_row_to_model(elem,
                           question_ids: Set[int] = None,
                           target_post_type: PostType = PostType.QUESTION):
    """Convert an xml row from the Posts.xml file to a model. Text is sanitized
    before conversion.
    
    question_ids is only applicable if the target post type is
    PostType.ANSWER. An answer is only added if its parent_id is
    contained in question_ids.
    """
    try:
        post_type = PostType(int(elem.attrib['PostTypeId']))
    except ValueError:  # was not a question or answer
        return None

    # early returns
    if target_post_type != post_type:
        return None
    if target_post_type == PostType.ANSWER and int(
            elem.attrib['ParentId']) not in question_ids:
        return None
    try:
        sanitized = sanitize_post(elem.attrib['Body'])
    except ValueError:
        LOGGER.error(
            f"Sanitization failed for Post with Id={elem.attrib['Id']}")
        return None

    date = MayaDT.from_rfc3339(elem.attrib['CreationDate']).date
    if post_type == PostType.ANSWER:
        title = None
        tags = None
        parent_id = elem.attrib['ParentId']
    else:  # is question
        title = elem.attrib['Title']
        tags = elem.attrib['Tags']
        parent_id = None
    post = Post(id=elem.attrib['Id'],
                creation_date=date,
                post_type_id=post_type.value,
                title=title,
                text=sanitized,
                tags=tags,
                parent_id=parent_id)
    return post
예제 #6
0
def _comment_xml_row_to_model(elem, post_ids: Set[int]):
    """Convert an xml row from the Comments.xml file to a model. Text is
    sanitized before conversion.
    
    Return None if the post_id is not contained in post_ids.
    """
    post_id = int(elem.attrib['PostId'])
    if post_id not in post_ids:
        return None
    try:
        sanitized = sanitize_comment(elem.attrib['Text'])
    except Exception as e:
        LOGGER.error(
            f"Sanitization failed for Comment with Id={elem.attrib['Id']}\n"
            f"{type(e).__name__}\n{str(e)}")
        return None

    date = MayaDT.from_rfc3339(elem.attrib['CreationDate']).date
    comment = Comment(id=elem.attrib['Id'],
                      creation_date=date,
                      text=sanitized,
                      post_id=post_id)
    return comment
예제 #7
0
def log_exception(pre_msg, e):
    LOGGER.error(f"{pre_msg}\n{type(e).__name__}: {str(e)}")