Example #1
0
def main():
    try:
        file_path = sys.argv[1]
        csv_obj = CSV(file_path)
        stock_list = csv_obj.stock_list
        resp = 'y'
        while resp.lower() == 'y':
            try:
                stock_name = input(
                    "Welcome Agent! Which stock you need to process? ")
                is_exact_match, prefixed_words = csv_obj.t.list_words(
                    stock_name)
                stock_name = input_stock_name(prefixed_words, stock_name,
                                              is_exact_match)
                if stock_list.__contains__(stock_name):
                    print('Processing for ' + stock_name)
                    start_date = parse_date(
                        input("From which date you want to start "))
                    end_date = parse_date(
                        input("Till which date you want to analyze "))
                    selected_stocks = select_stocks(stock_list[stock_name],
                                                    start_date, end_date)
                    mean_value, std = calculate_mean_std(selected_stocks)
                    buy_date, sell_date, profit = find_profit(selected_stocks)
                    print("Here is you result " + "Mean: " + str(mean_value) +
                          ", Std: " + str(std) + ", Buy date: " +
                          str(buy_date) + ", Sell date: " + str(sell_date) +
                          ", Profit: Rs. " + str(profit))
                else:
                    print("stock not found")
            except Exception as e:
                print("some error Occurred", e)
            resp = input("Do you want to continue? (y or n) ")
    except:
        print("could not process file")
Example #2
0
def get_issue_date(issue_id):
    issue_url = '%s%s' % (ISSUE_URL, issue_id)
    response = get_url(issue_url)
    page = BeautifulSoup(response.read())
    issue_date = page.find('div', 'issue').strong.string
    issue_datetime = parse_date(issue_date)
    return issue_datetime
Example #3
0
def get_issue_date(issue_id):
    issue_url = '%s%s' % (ISSUE_URL, issue_id)
    response = get_url(issue_url)
    page = BeautifulSoup(response.read())
    issue_date = page.find('div', 'issue').strong.string
    issue_datetime = parse_date(issue_date)
    return issue_datetime
Example #4
0
def check_csv(file_name, year=None, exclude=[]):
    '''
    Check for missing editorials
    year: check every day in the specified year
    exclude: list of days to exclude from checking (eg. to exclude Sunday [6])
    '''
    articles = csv.reader(open(file_name, 'rb'), delimiter=',', quotechar='"')
    article_dates = []
    missing_dates = []
    for article in articles:
        # Get the date
        date = parse_date(article[6])
        article_dates.append(date)
    article_dates.sort()
    duplicates = find_duplicates(article_dates)
    print 'Duplicates: %s' % len(duplicates)
    if year:
        start_date = datetime.date(year, 1, 1)
        end_date = datetime.date(year, 12, 31)
    else:
        start_date = article_dates[0]
        end_date = article_dates[-1]
    one_day = datetime.timedelta(days=1)
    this_day = start_date
    # Loop through each day in specified period to see if there's an article
    # If not, add to the missing_dates list.
    while this_day <= end_date:
        if this_day.weekday() not in exclude:  #exclude Sunday
            if this_day not in article_dates:
                missing_dates.append(this_day)
        this_day += one_day
    print 'Missing: %s' % len(missing_dates)
    csv_out = csv.DictWriter(open(file_name, 'ab'),
                             extrasaction='ignore',
                             fieldnames=[
                                 'id', 'title', 'url', 'newspaper_title',
                                 'newspaper_details', 'newspaper_id',
                                 'issue_date', 'page', 'page_url',
                                 'corrections', 'ftext'
                             ],
                             dialect=csv.excel)
    # Write a results file with nicely formatted dates
    with open(os.path.join(os.path.dirname(file_name), 'csv_check.html'),
              'wb') as results:
        results.write(
            '<html>\n<head>\n  <title>Results</title>\n</head>\n<body>')
        results.write('<h2>Duplicates:</h2>\n<table>\n')
        for dup in duplicates:
            results.write(
                '<tr><td>%s</td><td><a href="%s">View issue</a></td></tr>\n' %
                (format_date(dup), get_issue_url(dup, '35')))
        results.write('</table>\n')
        results.write('<h2>Missing:</h2>\n<table>\n')
        for missing in missing_dates:
            results.write(
                '<tr><td>%s</td><td><a href="%s">View issue</a></td></tr>\n' %
                (format_date(missing), get_issue_url(missing, '35')))
            csv_out.writerow({'issue_date': format_date(missing)})
        results.write('</table>\n')
        results.write('</body>\n</html>')
Example #5
0
 def test_parse_date(self):
     cases = [
         ('2 June 1884', {'date': datetime.datetime(1884, 6, 2), 'day': True, 'month': True}),
         ('03 Jul 1921', {'date': datetime.datetime(1921, 7, 3), 'day': True, 'month': True}),
         ('13 Jul. 1921', {'date': datetime.datetime(1921, 7, 13), 'day': True, 'month': True}),
         ('Dec 1778', {'date': datetime.datetime(1778, 12, 1), 'day': False, 'month': True}),
         ('1962', {'date': datetime.datetime(1962, 1, 1), 'day': False, 'month': False}),
     ]
     for case in cases:
         self.assertEqual(utilities.parse_date(case[0]), case[1])
Example #6
0
def check_csv(file_name, year=None, exclude=[]):
    '''
    Check for missing editorials
    year: check every day in the specified year
    exclude: list of days to exclude from checking (eg. to exclude Sunday [6])
    '''
    articles = csv.reader(open(file_name, 'rb'), delimiter=',', quotechar='"')
    article_dates = []
    missing_dates = []
    for article in articles:
        # Get the date
        date = parse_date(article[6])
        article_dates.append(date)
    article_dates.sort()
    duplicates = find_duplicates(article_dates)
    print 'Duplicates: %s' % len(duplicates)
    if year:
        start_date = datetime.date(year, 1, 1)
        end_date = datetime.date(year, 12, 31)
    else:
        start_date = article_dates[0]
        end_date = article_dates[-1]
    one_day = datetime.timedelta(days=1)
    this_day = start_date
    # Loop through each day in specified period to see if there's an article
    # If not, add to the missing_dates list.
    while this_day <= end_date:
        if this_day.weekday() not in exclude: #exclude Sunday
            if this_day not in article_dates:
                missing_dates.append(this_day)
        this_day += one_day
    print 'Missing: %s' % len(missing_dates)
    csv_out = csv.DictWriter(open(file_name, 'ab'), extrasaction='ignore', 
                                       fieldnames=['id', 'title', 'url', 
                                                   'newspaper_title', 'newspaper_details', 
                                                   'newspaper_id', 'issue_date', 'page', 
                                                   'page_url','corrections','ftext'], 
                                                   dialect=csv.excel)
    # Write a results file with nicely formatted dates
    with open(os.path.join(os.path.dirname(file_name), 'csv_check.html'), 'wb') as results:
        results.write('<html>\n<head>\n  <title>Results</title>\n</head>\n<body>')
        results.write('<h2>Duplicates:</h2>\n<table>\n')
        for dup in duplicates:
            results.write('<tr><td>%s</td><td><a href="%s">View issue</a></td></tr>\n' % (format_date(dup), get_issue_url(dup, '35')))
        results.write('</table>\n')
        results.write('<h2>Missing:</h2>\n<table>\n')
        for missing in missing_dates:
            results.write('<tr><td>%s</td><td><a href="%s">View issue</a></td></tr>\n' % (format_date(missing), get_issue_url(missing, '35')))
            csv_out.writerow({'issue_date': format_date(missing)})
        results.write('</table>\n')
        results.write('</body>\n</html>')
Example #7
0
 def __init__(self, stock_date, price):
     self.stock_date = parse_date(stock_date)
     self.price = float(price)
Example #8
0
def check_editorial(csv_file, year):
    print csv_file
    articles = csv.reader(open(csv_file, 'rb'), delimiter=',', quotechar='"')
    # Things to check:
    #   Duplicate dates
    #   Number of words
    #   Page Number
    #   Missing -- article for each issue
    title_id = None
    title_name = None
    article_count = 0
    article_dates = []
    articles_filtered = []
    odd_pages = []
    short_articles = []
    long_articles = []
    missing_dates = []
    for article in articles:
        article_count += 1
        if not title_id: title_id = article[5]
        if not title_name: title_name = article[3]
        # Get the date
        try:
            page_number = int(article[7])
        except ValueError:
            page_number = int(re.search(r'(\d+)', article[7]).group(1))
        article_url = article[2]
        article_title = article[1]
        article_date = parse_date(article[6])
        text = article[10]
        word_count = len(text.split())
        # Check if page number is odd or even
        if page_number % 2: 
            odd_pages.append({'url': article_url,
                              'title': article_title,
                              'date': article_date.isoformat(),
                              'page': page_number,
                              'length': word_count})
        else:
            if word_count < 100:
                short_articles.append({'url': article_url,
                              'title': article_title,
                              'date': article_date.isoformat(),
                              'page': page_number,
                              'length': word_count})
            elif word_count > 1500:
                long_articles.append({'url': article_url,
                              'title': article_title,
                              'date': article_date.isoformat(),
                              'page': page_number,
                              'length': word_count})
            else:
                article_dates.append(article_date.isoformat())
                articles_filtered.append({'url': article_url,
                              'title': article_title,
                              'date': article_date.isoformat(),
                              'page': page_number,
                              'length': word_count})
    # Return duplicates
    duplicate_dates = sorted(set(find_duplicates(article_dates)))
    # Remove duplicates
    article_dates = sorted(set(article_dates))
    issues = get_title_issues(title_id, year)
    issue_dates = [issue['date'] for issue in issues]
    for issue_date in issue_dates:
        if not issue_date in article_dates: missing_dates.append(issue_date)
    print 'Total articles: %s' % article_count
    print 'Odd pages: %s' % len(odd_pages)
    print 'Short articles: %s' % len(short_articles)
    print 'Long articles: %s' % len(long_articles)
    print 'Duplicate dates: %s' % len(duplicate_dates)
    print 'Found articles: %s' % len(articles_filtered)
    print 'Missing articles: %s' % len(missing_dates)
    results = {'id': title_id,
               'name': title_name,
               'odd': odd_pages,
               'short': short_articles,
               'long': long_articles,
               'articles': articles_filtered,
               'duplicates': duplicate_dates,
               'missing': missing_dates,
               'issues': len(issues)
               }
    json_file = '%s/json/%s.js' % (NMA_FOLDER, os.path.basename(csv_file)[:-4])
    print json_file
    with open(json_file, 'wb') as json_data:
        json.dump(results, json_data, indent=2)
    return results
Example #9
0
def check_editorial(csv_file, year):
    print csv_file
    articles = csv.reader(open(csv_file, 'rb'), delimiter=',', quotechar='"')
    # Things to check:
    #   Duplicate dates
    #   Number of words
    #   Page Number
    #   Missing -- article for each issue
    title_id = None
    title_name = None
    article_count = 0
    article_dates = []
    articles_filtered = []
    odd_pages = []
    short_articles = []
    long_articles = []
    missing_dates = []
    for article in articles:
        article_count += 1
        if not title_id: title_id = article[5]
        if not title_name: title_name = article[3]
        # Get the date
        try:
            page_number = int(article[7])
        except ValueError:
            page_number = int(re.search(r'(\d+)', article[7]).group(1))
        article_url = article[2]
        article_title = article[1]
        article_date = parse_date(article[6])
        text = article[10]
        word_count = len(text.split())
        # Check if page number is odd or even
        if page_number % 2:
            odd_pages.append({
                'url': article_url,
                'title': article_title,
                'date': article_date.isoformat(),
                'page': page_number,
                'length': word_count
            })
        else:
            if word_count < 100:
                short_articles.append({
                    'url': article_url,
                    'title': article_title,
                    'date': article_date.isoformat(),
                    'page': page_number,
                    'length': word_count
                })
            elif word_count > 1500:
                long_articles.append({
                    'url': article_url,
                    'title': article_title,
                    'date': article_date.isoformat(),
                    'page': page_number,
                    'length': word_count
                })
            else:
                article_dates.append(article_date.isoformat())
                articles_filtered.append({
                    'url': article_url,
                    'title': article_title,
                    'date': article_date.isoformat(),
                    'page': page_number,
                    'length': word_count
                })
    # Return duplicates
    duplicate_dates = sorted(set(find_duplicates(article_dates)))
    # Remove duplicates
    article_dates = sorted(set(article_dates))
    issues = get_title_issues(title_id, year)
    issue_dates = [issue['date'] for issue in issues]
    for issue_date in issue_dates:
        if not issue_date in article_dates: missing_dates.append(issue_date)
    print 'Total articles: %s' % article_count
    print 'Odd pages: %s' % len(odd_pages)
    print 'Short articles: %s' % len(short_articles)
    print 'Long articles: %s' % len(long_articles)
    print 'Duplicate dates: %s' % len(duplicate_dates)
    print 'Found articles: %s' % len(articles_filtered)
    print 'Missing articles: %s' % len(missing_dates)
    results = {
        'id': title_id,
        'name': title_name,
        'odd': odd_pages,
        'short': short_articles,
        'long': long_articles,
        'articles': articles_filtered,
        'duplicates': duplicate_dates,
        'missing': missing_dates,
        'issues': len(issues)
    }
    json_file = '%s/json/%s.js' % (NMA_FOLDER, os.path.basename(csv_file)[:-4])
    print json_file
    with open(json_file, 'wb') as json_data:
        json.dump(results, json_data, indent=2)
    return results