예제 #1
0
 def test_normal (self):
     url = 'http://example.com/'
     doc = download (url)
     title = pq (doc).find ('title').text ()
     print title
     self.assertTrue ('Example Domain', title)
예제 #2
0
quiet = args.quiet

# Twitter OAuth
oauthFile = '.oauth.json'
if not os.path.exists(oauthFile):
    auth = {'consumer_token': '', 'consumer_secret': ''}
    auth['consumer_token'] = input('Token: ')
    auth['consumer_secret'] = input('Secret: ')

    with open(oauthFile, 'w') as file:
        json.dump(auth, file, indent=4, default=lambda x: str(x))
else:
    file = open(oauthFile).read()
    auth = json.loads(file)

# Create output and tmp directory if necessary
if not os.path.exists('tmp'):
    os.makedirs('tmp')
if not os.path.exists(outputDir):
    os.makedirs(outputDir)

urlsFile = 'tmp/' + userId + '_urls.json'
downloadsFile = 'tmp/' + userId + '_download.json'

if quiet:
    sys.stdout = open(os.devnull, 'w')

getMedias(auth, userId, includeRetweets, imageSize, urlsFile)
generateResults(urlsFile, downloadsFile, filenameFormat)
download(downloadsFile, outputDir, False, False)
예제 #3
0
 def test_404 (self):
     url = 'http://www.cs.helsinki.fi/u/hxiao/rl-seminar/paper.pd'
     nil, err_code = download (url)
     self.assertEqual (nil, None)
     self.assertEqual (err_code, 404)
예제 #4
0
 def test_nonhtml (self):
     url = 'http://www.cs.helsinki.fi/u/hxiao/rl-seminar/paper.pdf'
     nil, err_msg = download (url)
     self.assertEqual (nil, None)
     self.assertEqual (err_msg, 'application/pdf')
예제 #5
0
from src.mapper import generate_results
from src.downloader import download

if __name__ == '__main__':
    # Parse program arguments
    ARGS = parse_args(sys.argv[1:])
    USER_IDS = parse_file_arg(ARGS.userid)

    # Twitter OAuth
    AUTH = get_oauth('.oauth.json')

    # Suppress output if the "quiet" flag is enabled
    if ARGS.quiet:
        sys.stdout = open(os.devnull, 'w')

    # For each user in the ID list
    for user_id in USER_IDS:

        # Create output directory if necessary
        outputDir = os.path.join(ARGS.output,
                                 user_id + os.sep if ARGS.o_userid else '')
        if not os.path.exists(outputDir):
            os.makedirs(outputDir)

        # Start the download
        medias = get_medias(AUTH, user_id, ARGS.retweets, ARGS.image_size,
                            ARGS.since, ARGS.since_id, ARGS.until,
                            ARGS.until_id, ARGS.likes)
        results = generate_results(medias, ARGS.format)
        download(results, outputDir, False, True)
예제 #6
0
 def test_normal(self):
     url = 'http://example.com/'
     doc = download(url)
     title = pq(doc).find('title').text()
     print title
     self.assertTrue('Example Domain', title)
예제 #7
0
 def test_nonhtml(self):
     url = 'http://www.cs.helsinki.fi/u/hxiao/rl-seminar/paper.pdf'
     nil, err_msg = download(url)
     self.assertEqual(nil, None)
     self.assertEqual(err_msg, 'application/pdf')
예제 #8
0
 def test_404(self):
     url = 'http://www.cs.helsinki.fi/u/hxiao/rl-seminar/paper.pd'
     nil, err_code = download(url)
     self.assertEqual(nil, None)
     self.assertEqual(err_code, 404)
예제 #9
0
파일: run.py 프로젝트: tomg404/Zeit-Scraper
    try:
        argparser.parse()

        # creates new csv file if it doesn't already exist
        if not os.path.isfile(csv_file):
            writer.create_new(csv_file)

        if not os.path.exists(xml_dir):
            os.makedirs(xml_dir)

        if not os.path.exists(save_xml_dir):
            os.makedirs(save_xml_dir)

        print('---------%s---------' % strftime("%Y-%m-%d %H:%M:%S", gmtime()))
        start_time = time()
        downloader.download()
        scraper.main()
        cleaner.clean()
        print('Execution took %s seconds' % (round(time() - start_time, 3)))
        print('-------------------------------------')

    except Exception as e:
        error_msg = 'Something went horribly wrong. Please check the program.\nError: %s' % e
        print(error_msg)
        traceback.print_exc()
        # if USE_NOTIFY is True it sends a message to the given endpoint
        if USE_NOTIFIER:
            from pushnotifier import PushNotifier as pn
            from threading import Thread

            def send_notification():