Ejemplo n.º 1
0
import os
import tinys3
import spiders.util as util
import json
import logging

logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG)

key = os.environ.get("S3_ACCESS_KEY")
secret_key = os.environ.get("S3_SECRET_ACCESS_KEY")
endpoint = "s3-us-west-2.amazonaws.com"
bucket = "w205twitterproject"
init_store = "links2.json"
local_store = "logs/temp_urls.log"

f = open(local_store, "w")

conn = util.s3_connect(key, secret_key, endpoint, default_bucket=bucket)
response = util.get_json(conn, init_store)

links = util.get_links(response)

for link in links:
    try:
        tmp_link = str(link["link"])
        f.write(tmp_link + "\n")
        logging.debug("Writing to store %s , link %s" % (local_store, tmp_link))
    except:
        pass
f.close()
Ejemplo n.º 2
0
import spiders.util as util
import json
import logging

logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG)

key = os.environ.get("S3_ACCESS_KEY")
secret_key = os.environ.get("S3_SECRET_ACCESS_KEY")
endpoint = "s3-us-west-2.amazonaws.com"
bucket = "w205twitterproject"
init_store = "links2.json"
local_store = "logs/temp_urls.log"

f= open(local_store,'w')

conn = util.s3_connect(key, secret_key, endpoint,default_bucket=bucket)
response = util.get_json(conn, init_store)

links = util.get_links(response)

for link in links:
	try:
		tmp_link = str(link['link'])
		f.write(tmp_link+'\n')
		logging.debug("Writing to store %s , link %s" % (local_store, tmp_link))
	except:
		pass
f.close()


Ejemplo n.º 3
0
bucket = "w205twitterproject"
spam_url_location = "logs/spammy_urls.log"
S3_url_location = "links2.json"

f= open(spam_url_location,'rb')

links = []
# Creating JSON object from extracted URLs
for line in f:
	json_obj = {"link":line.rstrip("\n")}
	links.append(json_obj)
# Connecting to S3
conn = util.s3_connect(key, secret_key, endpoint,default_bucket=bucket)
response = util.get_json(conn,S3_url_location)

logging.debug("original length of spammy urls list: %3f" % len(util.get_links(response)))
# Appending new links to JSON object in S3
util.append_links(response,links)

logging.debug("after appending length of spammy urls list: %3f" % len(util.get_links(response)))

upload = json.dumps(response)

# Loading extended JSON object into local memory
with open('logs/test_links.json', 'w') as outfile:
    json.dump(response, outfile)

# Loading extended JSON object into S3 bucket
f = open('test_links.json','r')

#util.upload_json(conn,"logs/test.json",f)