예제 #1
0
def insert_comp_art(organization, article):
    try:
        session = Session()
        org_list = organization.split(':')
        code = org_list[0].strip().upper()
        company = get_company_by_code(code)
        if company is None:
            return None
        com_art = CompanyArticle(gvkey=company.gvkey, article_id=article.id)
        if article.NS is not None:
            ns_list = article.NS.split('|')
        else:
            ns_list = []
        i = 0
        for cat in ns_list:
            cat_list = cat.split(':')
            match_cnum = re.match('c\d+', cat_list[0].strip())
            if i > 2 or match_cnum is None:
                continue
            i += 1
            if i == 1:
                com_art.main_category = cat_list[1].strip()
            if i == 2:
                com_art.sub_category = cat_list[1].strip()
        session.add(com_art)
        session.commit()
        logger.info('Matched company %s to article %s', company.factiva_name,
                    com_art.article_id)
        session.close()
    except Exception:
        logger.exception('message')
        raise
예제 #2
0
def process_file(fname):
    file_location = os.path.join(settings.RTF_DIR, fname)
    logger.info('Opening file %s...', fname)
    session = Session()
    if fname.startswith('~$'):
        return None
    try:
        with open(file_location, 'rb') as rtf_file:
            txt = rtf_file.read()
    except Exception:
        logger.warning('Cannot read from file %s', fname)
    clean_text = striprtf(txt)
    dicts = parser(clean_text, fname)
    if len(dicts) == 0:
        logger.error('Cannot extract articles from file %s', fname)
        return None
    logger.info('Found %d articles in file %s', len(dicts), fname)
    for dict_item in dicts:
        article = session.query(Articles).filter_by(id=dict_item['id']).first()
        if article:
            logger.info('Article %s already exists in database', article.id)
        else:
            article = Articles(**dict_item)
            session.add(article)
            session.commit()
    session.close()
    logger.info('Finished parsing file %s...', fname)
예제 #3
0
def create_post(author_id, title, description, body):
    session = Session()
    try:
        post = Post(author_id, title, description, body,
                    strftime("%Y-%m-%dT%H:%M:%S", gmtime()))
        session.add(post)
        session.commit()
        return successful(post.id)
    except:
        session.rollback()
        return internal_server_error
    finally:
        session.close()
예제 #4
0
def get_articles():
    session = Session()
    articles = session.query(Articles).all()
    for article in articles:
        analysed = session.query(Analysis).filter_by(id=article.id).first()
        if analysed:
            continue
        logger.info('Analysing %s', article.id)
        text = article.text.rstrip('None')
        analyzed_dict = get_data(text)
        analyzed_dict['id'] = article.id
        analyzed_dict.pop('doc_size')
        a = Analysis(**analyzed_dict)
        session.add(a)
    session.commit()
예제 #5
0
def new_user(email, name, password_encrypted, token):
    session = Session()
    try:
        user = User(email, name, password_encrypted, token)
        session.add(user)
        session.commit()
        return successful({
            'id': user.id,
            'email': user.email,
            'password_encrypted': user.password_encrypted,
            'token': user.token,
        })
    except:
        session.rollback()
        return internal_server_error
    finally:
        session.close()
예제 #6
0
def import_data(path):
    with open(path) as f:
        print(1)
        line_count = 0
        reader = csv.reader(f, delimiter=';')
        session = Session()
        for row in reader:
            if line_count == 0:
                print(f'Column names are {", ".join(row)}')
                line_count += 1
            else:
                company = Company(
                    gvkey=row[0],
                    name=row[1],
                    factiva_name=row[2],
                    factiva_code=row[3],
                )
                session.add(company)
        session.commit()
        print(f'Processed {line_count} lines.')