class Client: sock = None reddit = None def download_a(self, after=None): download_dir = os.path.join(tmpdir, 'client', 'staging') posts, nav = self.reddit.getListing('all', after) blob = { 'posts': posts, 'nav': nav } fp = open(os.path.join(download_dir, 'a_'+after), 'w') fp.write(json.dumps(blob)) fp.close() return True def download_p(self, pid): download_dir = os.path.join(tmpdir, 'client', 'staging') self.reddit.updateToken() post, comments = self.reddit.getPost(pid) blob = { 'post': post, 'comments': comments } fp = open(os.path.join(download_dir, 'p_'+pid), 'w') fp.write(json.dumps(blob)) fp.close() return True def download_u(self, user): download_dir = os.path.join(tmpdir, 'client', 'staging') url = 'http://www.reddit.com/user/'+user+'.json' filename = 'u_'+user data = self.download_get(url, os.path.join(download_dir, filename), True) if data == '': return False blob = json.loads(data) nav = parser.extract_listing_nav(blob) while nav['after'] is not None: newurl = url+'?after='+nav['after'] filename = 'u_'+user+'_'+nav['after'] data = self.download_get(newurl, os.path.join(download_dir, filename), True) if data == '': return False blob = json.loads(data) nav = parser.extract_listing_nav(blob) return True def download_req(self, req): # format of request: # | a | <pid> # | p | <pid> # | u | <username> | <after> res = True if req[0] == 'a': res = self.download_a(req[1]) elif req[0] == 'p': res = self.download_p(req[1]) elif req[0] == 'u': res = self.download_u(req[1]) return res def download_data(self, reqlist): for req in reqlist: self.download_req(req) print ' -- '+str(req) def connect(self, host, port): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((host, port)) self.sock = sock fp = sock.makefile('rb+') print 'Connected to '+host+':'+str(port) return fp def close(self): self.sock.close() self.sock = None def cleanup(self): download_dir = os.path.join(tmpdir, 'client', 'staging') for entry in os.listdir(download_dir): os.unlink(os.path.join(download_dir, entry)) os.unlink(os.path.join(tmpdir, 'client', 'archive.tar.gz')) def targz(self): return shutil.make_archive(os.path.join(tmpdir, 'client', 'archive'), 'gztar', os.path.join(tmpdir, 'client', 'staging')) def run(self, host, port): self.reddit = Reddit(creds.key, creds.secret, creds.username, creds.password, creds.redirect_uri) self.reddit.updateToken() self.reddit.testAccess() sleeptime = 0 while True: if sleeptime > 10: time.sleep(10) elif sleeptime > 1: time.sleep(1) # Connect to host:port, get the fp fp = self.connect(host, port) # Send hostname of client over initially hostname = socket.getfqdn() fp.write(hostname+'\n') fp.flush() if debug: print 'Sent hostname' # Recv all the urls reqlist = [] newline = False while True: line = fp.readline() line = line.strip() if line != '': reqlist.append(line.split(',')) else: if newline == True: break newline = True fp.flush() print host+' >> '+str(reqlist) # See if any urls were sent, close if zero if len(reqlist) == 0: if debug: print 'No requests' self.close() sleeptime += 1 continue sleeptime = 0 if debug: print 'Downloading requests' # Download all the urls otherwise self.download_data(reqlist) # targzip the data targz = self.targz() # Send the data targz_fp = open(targz, 'rb') targz_data = targz_fp.read() fp.write(targz_data) fp.flush() print host+' << archive.tar.gz' self.close() self.cleanup()
class Client: sock = None reddit = None def download_a(self, after=None): download_dir = os.path.join(tmpdir, 'client', 'staging') posts, nav = self.reddit.getListing('all', after) blob = {'posts': posts, 'nav': nav} fp = open(os.path.join(download_dir, 'a_' + after), 'w') fp.write(json.dumps(blob)) fp.close() return True def download_p(self, pid): download_dir = os.path.join(tmpdir, 'client', 'staging') self.reddit.updateToken() post, comments = self.reddit.getPost(pid) blob = {'post': post, 'comments': comments} fp = open(os.path.join(download_dir, 'p_' + pid), 'w') fp.write(json.dumps(blob)) fp.close() return True def download_u(self, user): download_dir = os.path.join(tmpdir, 'client', 'staging') url = 'http://www.reddit.com/user/' + user + '.json' filename = 'u_' + user data = self.download_get(url, os.path.join(download_dir, filename), True) if data == '': return False blob = json.loads(data) nav = parser.extract_listing_nav(blob) while nav['after'] is not None: newurl = url + '?after=' + nav['after'] filename = 'u_' + user + '_' + nav['after'] data = self.download_get(newurl, os.path.join(download_dir, filename), True) if data == '': return False blob = json.loads(data) nav = parser.extract_listing_nav(blob) return True def download_req(self, req): # format of request: # | a | <pid> # | p | <pid> # | u | <username> | <after> res = True if req[0] == 'a': res = self.download_a(req[1]) elif req[0] == 'p': res = self.download_p(req[1]) elif req[0] == 'u': res = self.download_u(req[1]) return res def download_data(self, reqlist): for req in reqlist: self.download_req(req) print ' -- ' + str(req) def connect(self, host, port): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((host, port)) self.sock = sock fp = sock.makefile('rb+') print 'Connected to ' + host + ':' + str(port) return fp def close(self): self.sock.close() self.sock = None def cleanup(self): download_dir = os.path.join(tmpdir, 'client', 'staging') for entry in os.listdir(download_dir): os.unlink(os.path.join(download_dir, entry)) os.unlink(os.path.join(tmpdir, 'client', 'archive.tar.gz')) def targz(self): return shutil.make_archive(os.path.join(tmpdir, 'client', 'archive'), 'gztar', os.path.join(tmpdir, 'client', 'staging')) def run(self, host, port): self.reddit = Reddit(creds.key, creds.secret, creds.username, creds.password, creds.redirect_uri) self.reddit.updateToken() self.reddit.testAccess() sleeptime = 0 while True: if sleeptime > 10: time.sleep(10) elif sleeptime > 1: time.sleep(1) # Connect to host:port, get the fp fp = self.connect(host, port) # Send hostname of client over initially hostname = socket.getfqdn() fp.write(hostname + '\n') fp.flush() if debug: print 'Sent hostname' # Recv all the urls reqlist = [] newline = False while True: line = fp.readline() line = line.strip() if line != '': reqlist.append(line.split(',')) else: if newline == True: break newline = True fp.flush() print host + ' >> ' + str(reqlist) # See if any urls were sent, close if zero if len(reqlist) == 0: if debug: print 'No requests' self.close() sleeptime += 1 continue sleeptime = 0 if debug: print 'Downloading requests' # Download all the urls otherwise self.download_data(reqlist) # targzip the data targz = self.targz() # Send the data targz_fp = open(targz, 'rb') targz_data = targz_fp.read() fp.write(targz_data) fp.flush() print host + ' << archive.tar.gz' self.close() self.cleanup()