import os import tinys3 import spiders.util as util import json import logging logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG) key = os.environ.get("S3_ACCESS_KEY") secret_key = os.environ.get("S3_SECRET_ACCESS_KEY") endpoint = "s3-us-west-2.amazonaws.com" bucket = "w205twitterproject" init_store = "links2.json" local_store = "logs/temp_urls.log" f = open(local_store, "w") conn = util.s3_connect(key, secret_key, endpoint, default_bucket=bucket) response = util.get_json(conn, init_store) links = util.get_links(response) for link in links: try: tmp_link = str(link["link"]) f.write(tmp_link + "\n") logging.debug("Writing to store %s , link %s" % (local_store, tmp_link)) except: pass f.close()
import spiders.util as util import json import logging logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG) key = os.environ.get("S3_ACCESS_KEY") secret_key = os.environ.get("S3_SECRET_ACCESS_KEY") endpoint = "s3-us-west-2.amazonaws.com" bucket = "w205twitterproject" init_store = "links2.json" local_store = "logs/temp_urls.log" f= open(local_store,'w') conn = util.s3_connect(key, secret_key, endpoint,default_bucket=bucket) response = util.get_json(conn, init_store) links = util.get_links(response) for link in links: try: tmp_link = str(link['link']) f.write(tmp_link+'\n') logging.debug("Writing to store %s , link %s" % (local_store, tmp_link)) except: pass f.close()
secret_key = os.environ.get("S3_SECRET_ACCESS_KEY") endpoint = "s3-us-west-2.amazonaws.com" bucket = "w205twitterproject" spam_url_location = "logs/spammy_urls.log" S3_url_location = "links2.json" f= open(spam_url_location,'rb') links = [] # Creating JSON object from extracted URLs for line in f: json_obj = {"link":line.rstrip("\n")} links.append(json_obj) # Connecting to S3 conn = util.s3_connect(key, secret_key, endpoint,default_bucket=bucket) response = util.get_json(conn,S3_url_location) logging.debug("original length of spammy urls list: %3f" % len(util.get_links(response))) # Appending new links to JSON object in S3 util.append_links(response,links) logging.debug("after appending length of spammy urls list: %3f" % len(util.get_links(response))) upload = json.dumps(response) # Loading extended JSON object into local memory with open('logs/test_links.json', 'w') as outfile: json.dump(response, outfile) # Loading extended JSON object into S3 bucket f = open('test_links.json','r')