class HoggyBot(irc.IRCClient): """A logging IRC bot.""" nickname = config.get('irc', 'nick') try: password = config.get('irc', 'password') except: password = None lineRate = 1 def __init__(self, *args, **kwargs): self.commander = actions.Commander(self) self.grabber = Grabber() # callbacks for events def connectionMade(self): irc.IRCClient.connectionMade(self) def connectionLost(self, reason): irc.IRCClient.connectionLost(self, reason) def signedOn(self): """Called when bot has succesfully signed on to server.""" for channel in self.factory.channels: self.join(channel) def joined(self, channel): """This will get called when the bot joins the channel.""" self.msg(channel, "I have arrived!") self.reddit_update = redditupdate.RedditUpdateThread(self, channel) self.reddit_update.parse_threads(self.reddit_update.request_threads(),False) self.reddit_update.start() def privmsg(self, user, channel, msg): """This will get called when the bot receives a message.""" user = user.split('!', 1)[0] # Check to see if they're sending me a private message if channel == self.nickname: message = self.commander.recv(msg,user) self.msg(user, message) return message = self.commander.recv(msg, user) self.grabber.stack(user, msg) if message: if message[:3] == "/me": message = message[4:] self.describe(channel,message) else: self.msg(channel, message) # For fun, override the method that determines how a nickname is changed on # collisions. The default method appends an underscore. def alterCollidedNick(self, nickname): """ Generate an altered version of a nickname that caused a collision in an effort to create an unused related name for subsequent registration. """ return nickname + '^'
def get(self, api): memory_file = StringIO() g = Grabber(api) g.grab(memory_file) self.response.headers['Content-Type'] = 'application/zip' self.response.headers[ 'Content-Disposition'] = ('attachment;filename={}.zip'.format(api)) self.response.write(memory_file.getvalue()) memory_file.close()
class Test_Parsing(unittest.TestCase): def setUp(self): self.grabber = Grabber(BR_THREAD) self.grabber.page_html = self.grabber.soup(TEST_HTML) def test_post_finding(self): self.grabber.find_posts() first_post = self.grabber.posts[0] self.assertEqual('CremersAlex', first_post.author) self.assertEqual('Directors And Other Artists On Blade Runner', first_post.title)
def main(): ip = settings.IP_ADDRESS scanner = Scanner(ip) scanner.scan(settings.START_PORT, settings.END_PORT) for port in scanner.open_ports: try: grabber = Grabber(ip, port) print(f'{port}: {grabber.read()}') grabber.close() except Exception as e: print('Error', e)
def main(): ip = '192.168.42.42' portrange = (1, 65535) scanner = Scanner(ip) scanner.scan(*portrange) for port in scanner.open_ports: try: grabber = Grabber(ip, port) print('Result is {} on port: {}'.format(grabber.read(), port)) grabber.close() except Exception as e: print('Result is Blocking on port: {}'.format(port))
def main(): ip = '127.0.0.1' portrange = (1, 1001) scanner = Scanner(ip) scanner.scan(*portrange) for port in scanner.open_ports: try: grabber = Grabber(ip, port) print(grabber.read()) grabber.close() except Exception: print("Error", e)
def __init__(self, sfrag, buffer_len=2): self.grb = Grabber(bbox=sfrag) # Calculate screen size size = (sfrag[2]-sfrag[0], sfrag[3]-sfrag[1]) self.dimmensions = size + (3,) # Set the frame buffer to zeros self.buffer_write = np.zeros(self.dimmensions, dtype=np.int8) self.buffer_read = np.zeros(self.dimmensions, dtype=np.int8) self.space_pressed = False
def __init__(self, onRobot): IO = IOTools(onRobot) print('Grabber initialised') self.camera = IO.camera.initCamera('pi', 'low') self.getInputs = IO.interface_kit.getInputs self.getSensors = IO.interface_kit.getSensors self.mc = IO.motor_control self.mc.stopMotors() self.sc = IO.servo_control self.sc.engage() self.grabber = Grabber(self.mc, self.MOTOR_PORT, self.sc) #self.grabber.prepare_grabber() self.lift = Lift(onRobot, self.mc) self.lift_pos = 0 self.s = None
def collect_base(): tmp_folder = r'D:\coding\senderbot\grabbed_once' for img in Grabber.get_list_of_files(tmp_folder, '.jpg'): base_directory = os.path.join(os.path.dirname(__file__), 'base') table = compare_to_base(img) img_file = os.path.join(tmp_folder, img) locate_probable(table, base_directory, img_file)
def test_all_rms(): #img = Image.open(r'C:\tempme\data\6cfabbc7b2a1f6fc9734b0cc2fffdb30.jpg') #c = ImageComparer.resize_to_grayscale(img, size=20) #c.save(r'C:\tempme\res.jpg') for f1 in Grabber.get_list_of_files(r'C:\tempme\data', '.jpg'): #print(f1, '----------') lst = [] for f2 in Grabber.get_list_of_files(r'C:\tempme\grabbed_once', '.jpg'): ff1 = Image.open(f1) ff2 = Image.open(f2) c1 = ImageComparer.resize_to_grayscale(ff1, size=30) c2 = ImageComparer.resize_to_grayscale(ff2, size=30) icmp = ImageComparer(c1, c2) lst.append(icmp.compare()) k = sorted(lst) print(k)
def run_grabber(): args = get_args() check_args(args) url = args.url json_output = args.json html_output = args.html output_path = os.path.expanduser(args.output) grabber = Grabber(url) grabber.run() if json_output: with open(os.path.join(output_path, '{}.json'.format(grabber.title)), 'w') as output: json.dump(grabber.json_output, output, indent=2) if html_output: raise NotImplementedError("HTML output is not implemented yet.")
def process_queue(self): map_fromsettings = True if not self.cell_list: self.cell_list = [] if self.config.get('map','render_from') == 'webdir': self.cell_list.extend(self._get_regions_from_apache_dir(self.config.get('service','render_webdir'))) elif self.config.get('map','render_from') == 'dsm': self.cell_list.extend(self._get_regions_from_dsm()) else: print 'select a valid(db/webdir) scavenging method!' return self._clean_tilepath() self.processor.write_helpers() map_fromsettings = False self.init_map(map_fromsettings) grabber = Grabber(self,self.config.getint('service','grab_workers')) grabber.grab(self.cell_list) return {'cells':len(self.cell_list)}
def fb_page_post(page_id): app_id = '481658928680227' app_secret = '77cde26a4e23cd4742f7dd0fd302029d' grabber = Grabber(app_id, app_secret, page_id) try: funs_by_region = grabber.get_page_fans_by_country() posts = grabber.get_last_10_posts() except facebook.GraphAPIError: return jsonify(**{"status": "fail"}) fb_page_data = { "page_id": page_id, "posts": posts, "funs_by_region": funs_by_region } db.fb_page.update({"page_id": page_id}, fb_page_data, upsert=True) return jsonify(**{"status": "ok"})
def news_list(db: Session = Depends(utils.get_db), limit: int = Query(None, gt=0, description='Количество новостей')): grabber = Grabber() data = grabber.news(limit) for event in data: news = get_news_by_link(db, event.get('link')) if news is None: try: news = grabber.grub(event.get('link')) pub_date = dt.strptime(event.get('published'), '%d.%m.%Y %H:%M') news = NewsCreate(**news, pub_date=pub_date) create_news(db, news) except Exception as e: logger.info( f'Не возможно обработать новость: {event}. Error {e}') return get_all_news(db, limit)
def main(): parser = OptionParser(usage="%prog [opts] userid", version="%prog " + VERSION) parser.add_option("-u", "--user", action="store", type="int", dest="userid", help="Specify user ID whose posts to search (required or pass as unnamed argument)") parser.add_option("-s", "--site", action="store", type="string", default='stackoverflow.com', dest="site", help='Which site to query. (default: stackoverflow.com)') parser.add_option("-t", "--type", action="store", type="choice", default="answers", choices=VALID_TYPES, dest="query_type", help="What type of post to query. Options: " + ', '.join(VALID_TYPES) + " (default: answers)") parser.add_option("-c", "--chain-length", action="store", type="int", dest="chain_length", default=6, help="Number of words to traverse in a single chain (default: 6)") parser.add_option("-n", "--num-chains", action="store", type="int", dest="num_chains", default=25, help="Number of chains to traverse in output (default: 25)") parser.add_option("-k", "--api-key", action="store", type="string", dest="api_key", help="API key (default: none)") (options, args) = parser.parse_args() if len(args) == 1: try: options.userid = int(args[0]) except ValueError: die("User ID must be an integer.") if options.userid is None or len(args) > 1: die("No user ID specified.") grabber = Grabber(options.site, options.userid) minimal_text = grabber.minimal_text(options.query_type) if not minimal_text: die("No posts found from that user.") spam = markovchain(' '.join(minimal_text), options.chain_length, options.num_chains) print spam
def get(self): try: # target = json.loads(self.request.body)["target"] target = self.request.GET['target'] isCourse = 'course' in self.request.GET # if target is a number its an ID! try: target = long(target) except ValueError: pass # This is required so that fetch requests doesn't time out! urlfetch.set_default_fetch_deadline(60) g = Grabber(target, isCourse) self.response.headers[ 'Content-Type'] = 'application/rss+xml; charset=utf-8' self.response.write(g.grab_rss_feed()) except ValueError: self.response.status = '400 malformed request body' except KeyError: self.response.status = '400 no target url specified' except InvalidTarget: self.response.status = '400 Could not find ID'
def try_to_guess(): tmp_folder = r'D:\coding\senderbot\try' counter = 0 newc = 0 for img in Grabber.get_list_of_files(tmp_folder, '.jpg'): img_file = os.path.join(tmp_folder, img) print 'Analyzing file: %s' % img tbl = compare_to_base(img_file) tbl2 = copy.deepcopy(tbl) tbl2 = normalize(tbl2) min_coef = 100 min_avg = 100 index = None for key in tbl: min_tmp = min(tbl[key]) min_avg_tmp = tbl2[key] if min_tmp < min_coef and min_avg_tmp < min_avg: index = key min_coef = min_tmp min_avg = min_avg_tmp with open(img_file, 'rb') as f: image_file = f.read() img_hash = hashlib.md5(image_file).hexdigest() nfl = '{0}.jpg'.format(img_hash) # new file name if min_coef < 15 and min_avg < 30: log.info('Probably: %s with min: %s and average: %s' % (index, min_coef, min_avg)) base_directory = os.path.join(os.path.dirname(__file__), 'base') result_folder = os.path.join(base_directory, index) shutil.move(img_file, os.path.join(result_folder, nfl)) counter += 1 else: log.info('Maybe new, because: %s and avg: %s' % (min_coef, min_avg)) folder_for_new_unique = r'D:\coding\senderbot\new_unique' shutil.move(img_file, os.path.join(folder_for_new_unique, nfl)) newc += 1 log.info('added already known pictures to base: %s' % counter) log.info('unique pictures here: %s' % newc)
def test_mobile_url(self): grabber = Grabber(BR_THREAD_MOBILE) self.assertEqual(BR_THREAD + '/', grabber.url)
import argparse, os.path parser = argparse.ArgumentParser(description='Quickly extract frames from a single video or synchronously from a stereo video.') parser.add_argument('--video-file', type=str, help='The video file.', required=True) parser.add_argument('--right-video-file', type=str, help='The optional right video file for stereo setups.') parser.add_argument('--split', dest='split', action="store_true", help='Assume the video file is side-by-side stereo and split the frames into left and right.') args = parser.parse_args() if not os.path.exists(args.video_file): parser.print_help() import sys sys.exit(1) if args.right_video_file is not None and args.split is True: print args.right_video_file print args.split parser.print_help() import sys sys.exit(1) from grabber import Grabber g = Grabber(args.split) g.open(args.video_file, args.right_video_file) g.run() print("\nDone extracting frames!\n\n")
def test_init(self): grabber = Grabber(BR_THREAD) self.assertEqual(BR_THREAD, grabber.url) self.assertEqual(BR_THREAD_FLAT, grabber.flat_thread)
def __init__(self, *args, **kwargs): self.commander = actions.Commander(self) self.grabber = Grabber()
class Toddler: MOTOR_PORT = 1 BUMP_SENSOR_SHELF_1 = 0 BUMP_SENSOR_SHELF_2 = 1 BUMP_SENSOR_GRABBER_FRONT = 2 BUMP_SENSOR_GRABBER_BACK = 3 def __init__(self, onRobot): IO = IOTools(onRobot) print('Grabber initialised') self.camera = IO.camera.initCamera('pi', 'low') self.getInputs = IO.interface_kit.getInputs self.getSensors = IO.interface_kit.getSensors self.mc = IO.motor_control self.mc.stopMotors() self.sc = IO.servo_control self.sc.engage() self.grabber = Grabber(self.mc, self.MOTOR_PORT, self.sc) #self.grabber.prepare_grabber() self.lift = Lift(onRobot, self.mc) self.lift_pos = 0 self.s = None # self.mc.setMotor(self.MOTOR_PORT, 100) # time.sleep(3) # self.mc.stopMotors() def kill_socket(self): s.close() def listen(self): global halt try: PORT = 65432 # Port to listen on (non-privileged ports are > 1023) self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # HOST = socket.gethostbyname(socket.gethostname()) HOST = '192.168.105.139' self.s.bind(('192.168.105.139', PORT)) print("Listening on {}:{}".format(HOST, PORT)) self.s.listen(1) conn, addr = self.s.accept() while not (halt['stop']): data = conn.recv(1024) data = data.decode('utf-8') data = data.split(' ') print("Listen: " + data[0]) if data[0] == 'grab': self.grabber.grab(self) elif data[0] == 'upper_grab': self.grabber.upper_grab(self) elif data[0] == 'prepare': self.grabber.prepare_grabber() elif data[0] == 'retract': self.grabber.retract_grabber() elif data[0] == 'wait_for_bump': inp = self.getInputs() while inp[self.BUMP_SENSOR_SHELF_1] == 0 or inp[ self.BUMP_SENSOR_SHELF_2] == 0: print(inp) print("bump") elif data[0] == 'lift': if int(data[1]) < self.lift_pos: print('down') self.lift.lift('down') elif int(data[1]) > self.lift_pos: print('up') self.lift.lift('up') self.lift_pos = int(data[1]) time.sleep(0.5) elif data[0] == 'drop': self.grabber.prepare_grabber() while inp[self.BUMP_SENSOR_GRABBER_BACK] == 0: print('Waiting for collection') self.grabber.retract_grabber() print("Listen done") conn.sendall(b'done') conn.close() except KeyboardInterrupt: conn.close() return def control(self): global halt try: thread = Thread(target=self.listen) thread.daemon = True thread.start() print("here") rjr = RobotJobListener(('192.168.105.38', 9000), ('192.168.105.139', 65432), ('192.168.105.94', 65433)) rjr.start_reliable_listener('robot') # start pinging the server # server, rasppi, ev3 except KeyboardInterrupt: halt['stop'] = True self.sc.disengage() return def vision(self): # image = self.camera.getFrame() # self.camera.imshow('Camera', image) time.sleep(0.05) return
def main(): grabber = Grabber() grabber.go()
def setUp(self): self.post = ForumPost(Grabber.soup(VALID_POST).find(class_='comment'))
import logging from grabber import Grabber from database import config logger = logging.getLogger('youtuber') handler = logging.FileHandler('error.log') handler.setLevel(logging.ERROR) formatter = logging.Formatter('%(asctime)s - %(name)s - %(message)s') handler.setFormatter(formatter) logger.addHandler(handler) if __name__ == '__main__': grabber = Grabber(config.get('api_key', ''), config.get('channels', [])) grabber.run()
class Superbowl(Processor): def __init__(self, db, token): self.g = Grabber(token) self.db = db """ enter endpoint and getstring (which will be converted to proper api url) OR a url""" def __getFileObj(self, filename): f = open(filename, 'r') obj = f.read() f.close() return json.loads(obj) def getProcessed(self): all_posts = self.g.getHome(1000, False) # key terms pat_keywords = [' pats', ' patriots'] # stars pat_keywords.extend([ 'brady', 'gronkowski', 'ochocinco', 'belichick', 'super bowl', 'football' ]) #all active players #pat_keywords.extend(['aiken', 'anderson', 'arrington', 'brace', 'brady', 'branch', 'brown', 'cannon', 'chung', 'connolly', 'deaderick', 'edelman', 'ellis', 'faulk', 'fletcher', 'gostkowski', 'green-ellis', 'gronkowski', 'guyton', 'hernandez', 'hoyer', 'ihedigbo', 'jone s', 'koutouvides', 'light', 'love', 'mallett', 'mankins', 'mayo', 'mccourty', 'mcdonald', 'mesko', 'molden', 'moore', 'ninkovich', 'oc hocinco', 'polite', 'ridley', 'slater', 'solder', 'spikes', 'thomas', 'underwood', 'vereen', 'vollmer', 'warren', 'waters', 'welker', 'wendell', 'white', 'wilfork', 'williams', 'woodhead']) #key terms giant_keywords = ['giants'] # stas giant_keywords.extend(['manning', 'coughlin', 'superbowl']) #giant_keywords.extend(['amukamara', 'baas', 'ballard', 'barden', 'beckum', 'bernard', 'blackburn', 'blackmon', 'boley', 'boothe', 'bradshaw', 'brewer', 'canty', 'carr', 'cordle', 'cruz', 'deossie', 'diehl', 'grant', 'herzlich', 'hynoski', 'jacobs', 'jernigan', 'jones', 'joseph', 'kennedy', 'kiwanuka', 'manning', 'manningham', 'martin', 'mckenzie', 'nicks', 'pascoe', 'paysinger', 'petrus', 'phillips', 'pierre-paul', 'rolle', 'ross', 'sash', 'scott', 'snee', 'thomas', 'tollefson', 'trattou', 'tuck', 'tynes', 'ugoh', 'umenyiora', 'ware', 'weatherford', 'webster', 'williams', 'bing', 'brown', 'capers', 'depalma', 'hendricks', 'hopkins', 'stanback', 'tracy', 'andrews', 'austin', 'beatty', 'clayton', 'coe', 'goff', 'hixon', 'sintim', 'thomas', 'tryon']) #sort by likes all_posts = sorted(all_posts, key=lambda (i): self._fieldCount(i)) #segregate by types pr = ap(all_posts) photos = pr.getByKeyValue('type', 'photo') posts = pr.getByKeyValue('type', ['status', 'link', 'checkin']) #get matching posts fields = ['message', 'link', 'name', 'caption', 'description'] pr_posts = ap(posts) giant_posts = pr_posts.searchPosts(fields, giant_keywords, True) pat_posts = pr_posts.searchPosts(fields, pat_keywords, True) pr_photos = ap(photos) giant_photos = pr_photos.searchPosts(fields, giant_keywords) pat_photos = pr_photos.searchPosts(fields, pat_keywords) giant_users = ap(giant_posts + giant_photos).groupByUid(False) pat_users = ap(pat_posts + pat_photos).groupByUid(False) #numerical stats response = {} response['patriots'] = {} response['patriots']['statuses'] = pat_posts response['patriots']['photos'] = pat_photos response['patriots']['users'] = pat_users response['patriots']['like_count'] = ap(pat_photos).countLikes() + ap( pat_posts).countLikes() response['patriots']['comment_count'] = ap( pat_photos).countComments() + ap(pat_posts).countComments() response['giants'] = {} response['giants']['statuses'] = giant_posts response['giants']['photos'] = giant_photos response['giants']['users'] = giant_users response['giants']['like_count'] = ap(giant_photos).countLikes() + ap( giant_posts).countLikes() response['giants']['comment_count'] = ap( giant_photos).countComments() + ap(giant_posts).countComments() #count active friends active_friends = [] for u in pat_users: active_friends.append(u['id']) for u in giant_users: active_friends.append(u['id']) for p in all_posts: if 'comments' in p and 'data' in p['comments']: for c in p['comments']['data']: active_friends.append(c['from']['id']) active_friend_count = len(Counter(active_friends)) response['active_friends'] = {'count': active_friend_count} #dump data into mongo self.db.users.insert({ 'username': self.g.getUsername(), 'data': self.g.getUser() }) self.db.tokens.insert({ 'username': self.g.getUsername(), 'token': self.g.getToken() }) self.db.feed.insert({ 'username': self.g.getUsername(), 'posts': all_posts }) return json.dumps(response)
from time import sleep, time from grabber import Grabber SCALE = 0.003 if __name__ == '__main__': servers = [] servers.append(('192.168.1.110', 40000)) #servers.append(('192.168.1.110', 40001)) #servers.append(('192.168.1.110', 40002)) grab = Grabber(addr_list=servers, precision=8) grab.init() #sleep(1) try: # do while CTRL + C not pressed i = 0 gtime = time() startTime = time() cnt = 0 timeStep = 1 import os #while i <= 1000: while startTime > time( ) - 60 * 100000: # limit execution time to 60 seconds # print(i) i += 1 #sleep(0.1) grab.process()
import pymongo from grabber import Grabber from pymongo import MongoClient client = MongoClient('localhost', 27017) # connect to mongoDb server db = client.DE101 # connect to db_name: DE101 natgeo_page_id = 23497828950 # https://www.facebook.com/natgeo app_id = '481658928680227' app_secret = '77cde26a4e23cd4742f7dd0fd302029d' grabber = Grabber(app_id, app_secret, natgeo_page_id) funs_by_region = grabber.get_page_fans_by_country() posts = grabber.get_last_10_posts() fb_page_data = { "page_id": natgeo_page_id, "posts": posts, "funs_by_region": funs_by_region } db.fb_page.update({"page_id": natgeo_page_id}, fb_page_data, upsert=True)
from grabber import Grabber from flask import Flask, jsonify, request, Response import json import os.path import re # Variables URL = "https://cat-fact.herokuapp.com/facts" FILENAME = "data.json" if not os.path.isfile(FILENAME): g = Grabber() g.grab(URL, FILENAME) app = Flask(__name__) @app.route('/', methods=["GET"]) def home(): return "Welcome to cat facts" @app.route('/api/v1/catfacts', methods=["GET"]) def get_catfacts(): # Create a params dict to hold query parameters params = {} params['firstname'] = request.args.get('firstname') params['lastname'] = request.args.get('lastname') params['id'] = request.args.get('id') print("params is ", params)
class HoggyBot(irc.IRCClient): """A logging IRC bot.""" nickname = config.get('irc', 'nick') def __init__(self, *args, **kwargs): self.commander = actions.Commander(self) self.grabber = Grabber() # callbacks for events def connectionMade(self): irc.IRCClient.connectionMade(self) self.logger = MessageLogger(open(self.factory.filename, "a")) self.logger.log("[connected at %s]" % time.asctime(time.localtime(time.time()))) def connectionLost(self, reason): irc.IRCClient.connectionLost(self, reason) self.logger.log("[disconnected at %s]" % time.asctime(time.localtime(time.time()))) self.logger.close() def signedOn(self): """Called when bot has succesfully signed on to server.""" for channel in self.factory.channels: self.join(channel) def joined(self, channel): """This will get called when the bot joins the channel.""" self.logger.log("[I have joined %s]" % channel) self.reddit_update = redditupdate.RedditUpdateThread(self, channel) self.reddit_update.parse_threads(self.reddit_update.request_threads(),False) self.reddit_update.start() def privmsg(self, user, channel, msg): """This will get called when the bot receives a message.""" user = user.split('!', 1)[0] # Check to see if they're sending me a private message if channel == self.nickname: message = self.commander.recv(msg,user) self.msg(user, message) return message = self.commander.recv(msg, user) self.grabber.stack(user, msg) if message: if message[:3] == "/me": message = message[4:] self.describe(channel,message) else: self.msg(channel, message) #def action(self, user, channel, msg): # """This will get called when the bot sees someone do an action.""" # user = user.split('!', 1)[0] # self.logger.log("* %s %s" % (user, msg)) # irc callbacks def irc_NICK(self, prefix, params): """Called when an IRC user changes their nickname.""" old_nick = prefix.split('!')[0] new_nick = params[0] self.logger.log("%s is now known as %s" % (old_nick, new_nick)) # For fun, override the method that determines how a nickname is changed on # collisions. The default method appends an underscore. def alterCollidedNick(self, nickname): """ Generate an altered version of a nickname that caused a collision in an effort to create an unused related name for subsequent registration. """ return nickname + '^'
class Superbowl(Processor): def __init__(self, db, token): self.g = Grabber(token) self.db = db """ enter endpoint and getstring (which will be converted to proper api url) OR a url""" def __getFileObj(self, filename): f = open(filename, 'r') obj = f.read() f.close() return json.loads(obj) def getProcessed(self): all_posts = self.g.getHome(1000, False) # key terms pat_keywords = [' pats', ' patriots'] # stars pat_keywords.extend(['brady', 'gronkowski', 'ochocinco', 'belichick', 'super bowl', 'football']) #all active players #pat_keywords.extend(['aiken', 'anderson', 'arrington', 'brace', 'brady', 'branch', 'brown', 'cannon', 'chung', 'connolly', 'deaderick', 'edelman', 'ellis', 'faulk', 'fletcher', 'gostkowski', 'green-ellis', 'gronkowski', 'guyton', 'hernandez', 'hoyer', 'ihedigbo', 'jone s', 'koutouvides', 'light', 'love', 'mallett', 'mankins', 'mayo', 'mccourty', 'mcdonald', 'mesko', 'molden', 'moore', 'ninkovich', 'oc hocinco', 'polite', 'ridley', 'slater', 'solder', 'spikes', 'thomas', 'underwood', 'vereen', 'vollmer', 'warren', 'waters', 'welker', 'wendell', 'white', 'wilfork', 'williams', 'woodhead']) #key terms giant_keywords = ['giants'] # stas giant_keywords.extend(['manning', 'coughlin', 'superbowl']) #giant_keywords.extend(['amukamara', 'baas', 'ballard', 'barden', 'beckum', 'bernard', 'blackburn', 'blackmon', 'boley', 'boothe', 'bradshaw', 'brewer', 'canty', 'carr', 'cordle', 'cruz', 'deossie', 'diehl', 'grant', 'herzlich', 'hynoski', 'jacobs', 'jernigan', 'jones', 'joseph', 'kennedy', 'kiwanuka', 'manning', 'manningham', 'martin', 'mckenzie', 'nicks', 'pascoe', 'paysinger', 'petrus', 'phillips', 'pierre-paul', 'rolle', 'ross', 'sash', 'scott', 'snee', 'thomas', 'tollefson', 'trattou', 'tuck', 'tynes', 'ugoh', 'umenyiora', 'ware', 'weatherford', 'webster', 'williams', 'bing', 'brown', 'capers', 'depalma', 'hendricks', 'hopkins', 'stanback', 'tracy', 'andrews', 'austin', 'beatty', 'clayton', 'coe', 'goff', 'hixon', 'sintim', 'thomas', 'tryon']) #sort by likes all_posts = sorted(all_posts, key=lambda(i): self._fieldCount(i)) #segregate by types pr = ap(all_posts) photos = pr.getByKeyValue('type', 'photo') posts = pr.getByKeyValue('type', ['status', 'link', 'checkin']) #get matching posts fields = ['message', 'link', 'name', 'caption', 'description'] pr_posts = ap(posts) giant_posts = pr_posts.searchPosts(fields, giant_keywords, True) pat_posts = pr_posts.searchPosts(fields, pat_keywords, True) pr_photos = ap(photos) giant_photos = pr_photos.searchPosts(fields, giant_keywords) pat_photos = pr_photos.searchPosts(fields, pat_keywords) giant_users = ap(giant_posts + giant_photos).groupByUid(False) pat_users = ap(pat_posts + pat_photos).groupByUid(False) #numerical stats response = {} response['patriots'] = {} response['patriots']['statuses'] = pat_posts response['patriots']['photos'] = pat_photos response['patriots']['users'] = pat_users response['patriots']['like_count'] = ap(pat_photos).countLikes() + ap(pat_posts).countLikes() response['patriots']['comment_count'] = ap(pat_photos).countComments() + ap(pat_posts).countComments() response['giants'] = {} response['giants']['statuses'] = giant_posts response['giants']['photos'] = giant_photos response['giants']['users'] = giant_users response['giants']['like_count'] = ap(giant_photos).countLikes() + ap(giant_posts).countLikes() response['giants']['comment_count'] = ap(giant_photos).countComments() + ap(giant_posts).countComments() #count active friends active_friends = [] for u in pat_users: active_friends.append(u['id']) for u in giant_users: active_friends.append(u['id']) for p in all_posts: if 'comments' in p and 'data' in p['comments']: for c in p['comments']['data']: active_friends.append(c['from']['id']) active_friend_count = len(Counter(active_friends)) response['active_friends'] = {'count': active_friend_count} #dump data into mongo self.db.users.insert({'username': self.g.getUsername(), 'data' : self.g.getUser()}) self.db.tokens.insert({'username':self.g.getUsername(), 'token' : self.g.getToken()}) self.db.feed.insert({'username': self.g.getUsername(), 'posts': all_posts}) return json.dumps(response)
def test_pagination(self): grabber = Grabber(BR_THREAD) for x in range(1, 11): grabber.page_index = x self.assertTrue(grabber.current_url.endswith('?p={}'.format(x)))
def test_title(self): grabber = Grabber(BR_THREAD) grabber.page_html = grabber.soup(TEST_HTML) grabber.find_posts() self.assertEqual("Directors And Other Artists On Blade Runner", grabber.title)
class ScreenRecorder(object): grp = None dimmensions = None # Screen area attempt = 1 keypresses = [] buffer_write = None # Write pointer buffer_read = None # Read pointer space_pressed = False attempt_startime = 0 attempt_endtime = 0 times = [] # List with the time progression of each attempt writter = None # For saving video # sfrag example: (1, 26, 1601, 926) captures 1600x900 # without the window bar def __init__(self, sfrag, buffer_len=2): self.grb = Grabber(bbox=sfrag) # Calculate screen size size = (sfrag[2]-sfrag[0], sfrag[3]-sfrag[1]) self.dimmensions = size + (3,) # Set the frame buffer to zeros self.buffer_write = np.zeros(self.dimmensions, dtype=np.int8) self.buffer_read = np.zeros(self.dimmensions, dtype=np.int8) self.space_pressed = False # Grabs a frame and sotores it in buffer[buffer_head] # Also grabs the key value def refresh_frame(self): if wapi.GetAsyncKeyState(32): self.space_pressed = True else: self.space_pressed = False self.buffer_write = self.grb.grab(None) self.buffer_write, self.buffer_read = self.buffer_read, self.buffer_write # Gets the newest frame from buffer[buffer_head] def get_newest_frame(self): return self.buffer_read def capture_live(self, show=False, save=False, savePath="video_raw.mp4", time_frame=5, cut_attempt=False): time_start = time.time() if save and (savePath != None): fps=60 # !!! if cut_attempt: size = (self.dimmensions[0]-200, self.dimmensions[1]-100) else: size = (self.dimmensions[0], self.dimmensions[1]) fourcc = cv2.VideoWriter_fourcc(*'mp4v') writer = cv2.VideoWriter(savePath, fourcc, fps, size) self.attempt_startime = time.time() while True: # Cambiar por tecla !!! if time.time() - time_start > time_frame: break self.refresh_frame() self.keypresses.append(self.key_check(32)) self.update_attempt() # Recortar el frame if cut_attempt: frame = self.reduce_frame(self.buffer_read) else: frame = self.buffer_read # Muestra la grabacion if show: #frame = self.buffer_read cv2.imshow('frame',frame) cv2.waitKey(1) if save: #frame = self.buffer_read time.sleep(0.01) writer.write(frame) if save: writer.release() np.asarray(self.keypresses) np.savez_compressed("keypresses_raw.npz", self.keypresses, allow_pickle=True) def update_attempt(self, tolerance=0): last_frame = self.get_attempt_area(self.buffer_read) last_last_frame = self.get_attempt_area(self.buffer_write) if self.attempt_has_changed(last_frame, last_last_frame, tolerance): self.attempt_endtime = time.time() self.times.append(self.attempt_endtime - self.attempt_startime) print(f"Attempt: {self.attempt}, Time: {self.times[-1]}") self.attempt += 1 self.attempt_startime = time.time() return True return False # Isolates a small area that indicates the attempt number # from a frame def get_attempt_area(self, frame): return frame[0:100, 390:480] def attempt_has_changed(self, frame1_attempt_area, frame_2_attempt_area, tolerance=0): diff = frame1_attempt_area - frame_2_attempt_area m_norm = np.sum(abs(diff)) # Manhattan norm #z_norm = norm(diff.ravel(), 0) # Zero norm if m_norm > tolerance: return True return False def reduce_frame(self, frame): return frame[100:, 200:] def key_check(self, key): if wapi.GetAsyncKeyState( key): return True return False
def setUp(self): self.grabber = Grabber(BR_THREAD) self.grabber.page_html = self.grabber.soup(TEST_HTML)
from grabber import Grabber def open_connection(): cnx = mysql.connector.connect(user=os.getenv('db_username'), password=os.getenv('db_password'), host=os.getenv('db_host'), database=os.getenv('db_name')) return cnx t = open_connection() cursor = t.cursor() query = 'SELECT player FROM player' rows = cursor.execute(query) player_ids = cursor.fetchall() player_ids = ["%s/" % x for x in player_ids] i = 0 player_load = [] while i < len(player_ids): player_load.append(player_ids[i:i + 100]) i += 100 t = Grabber('tweets') for players in player_ids: t.set_player_ids(players) t.grab_data() print(t.json_load)
def test_deleted_post(self): post = ForumPost(Grabber.soup(DELETED_POST).find(class_='comment')) self.assertTrue(post.deleted)
def __init__(self, db, token): self.g = Grabber(token) self.db = db
from grabber import Grabber from processor.zcool import ZCool from supplier import Supplier resources = Supplier.get_resources() for resource in resources: grabber = Grabber(resource) grabber.get() ZCool.parse_article_list(grabber.content) break # grabber.write()
async def main(): async with aiohttp.ClientSession(headers=VkApi.headers) as session: api = VkApi(VK_TOKEN, session) detector = Detector(PATH_TO_WEIGHTS) grabber = Grabber( api=api, detector=detector, profiles=PROFILES, save_dir=DEST_DIR, COUNTRY_CODES=COUNTRY_CODES, MIN_PHOTOS=MIN_PHOTOS, MAX_PHOTOS=MAX_PHOTOS, MIN_PHOTO_W=MIN_PHOTO_W, MIN_PHOTO_H=MIN_PHOTO_H, MIN_CROPS=MIN_CROPS, MAX_CROPS=MAX_CROPS, MIN_CROP_SIZE=MIN_CROP_SIZE, ) try: # TODO: Automate workers start, so main gets start/end ids from argv # and all workers mill it together # TODO: User concurrent.futures.ProcessPoolExcecutor with loop.run_in_executror(...) mil = 10 ** 6 tasks = [ asyncio.create_task(grabber.user_fetcher(1, mil)), asyncio.create_task(grabber.user_fetcher(mil, 2 * mil)), asyncio.create_task(grabber.user_fetcher(2 * mil, 3 * mil)), asyncio.create_task(grabber.user_fetcher(3 * mil, 4 * mil)), asyncio.create_task(grabber.user_fetcher(4 * mil, 5 * mil)), asyncio.create_task(grabber.user_fetcher(5 * mil, 6 * mil)), asyncio.create_task(grabber.user_fetcher(6 * mil, 7 * mil)), asyncio.create_task(grabber.user_fetcher(7 * mil, 8 * mil)), asyncio.create_task(grabber.user_fetcher(8 * mil, 9 * mil)), asyncio.create_task(grabber.user_fetcher(9 * mil, 10 * mil)), asyncio.create_task(grabber.user_fetcher(10 * mil, 11 * mil)), asyncio.create_task(grabber.photo_fetcher()), asyncio.create_task(grabber.photo_fetcher()), asyncio.create_task(grabber.photo_fetcher()), asyncio.create_task(grabber.cropper()) ] await asyncio.gather(*tasks) except KeyboardInterrupt: logger.info('Manual stop triggered')
class HoggyBot(irc.IRCClient): """A logging IRC bot.""" nickname = config.get('irc', 'nick') try: password = config.get('irc', 'password') except: password = None lineRate = 1 def __init__(self, *args, **kwargs): self.commander = actions.Commander(self) self.grabber = Grabber() # callbacks for events def connectionMade(self): irc.IRCClient.connectionMade(self) def connectionLost(self, reason): irc.IRCClient.connectionLost(self, reason) def signedOn(self): """Called when bot has succesfully signed on to server.""" for channel in self.factory.channels: self.join(channel) def joined(self, channel): """This will get called when the bot joins the channel.""" self.msg(channel, "I have arrived!") if self.password: print "Registering username %s with %s" % (self.nickname, self.password) self.msg('NickServ', 'IDENTIFY %s' % self.password) self.reddit_update = redditupdate.RedditUpdateThread(self, channel) self.reddit_update.parse_threads(self.reddit_update.request_threads(), False) self.reddit_update.start() def privmsg(self, user, channel, msg): """This will get called when the bot receives a message.""" user = user.split('!', 1)[0] # Check to see if they're sending me a private message if channel == self.nickname: message = self.commander.recv(msg, user) self.msg(user, message) return message = self.commander.recv(msg, user) self.grabber.stack(user, msg) if message: if message[:3] == "/me": message = message[4:] self.describe(channel, message) else: self.msg(channel, message) # For fun, override the method that determines how a nickname is changed on # collisions. The default method appends an underscore. def alterCollidedNick(self, nickname): """ Generate an altered version of a nickname that caused a collision in an effort to create an unused related name for subsequent registration. """ return nickname + '^'
# -*- coding: utf-8 -*- from grabber import Grabber import gevent.monkey; import processor from bs4 import BeautifulSoup, SoupStrainer from urllib.request import urlopen import time import progressbar import multiprocessing import csv import datetime grab = Grabber() # bar1 = progressbar.ProgressBar(max_value=progressbar.UnknownLength) # bar2 = progressbar.ProgressBar(max_value=progressbar.UnknownLength) # bar3 = progressbar.ProgressBar(max_value=progressbar.UnknownLength) logging = True processor.logging = logging grab.logging = logging # output_file = 'output/' + 'doc.csv' output_file = 'output/doc {}.csv'.format(str(datetime.datetime.now())[:-7]) failed_file = 'failed.txt' res = 'res/' class Scraper:
def __init__(self, token): self.g = Grabber(token)