def get_jobs_for_all_instances(): instance_list = ec2.getCrawlerInstances() job_dict = {} for i in instance_list: try: job_dict.update(get_jobs_for_instance(i.id)) except: pass return job_dict
def __init__(self, *args, **kwargs): super(SiteForm, self).__init__(*args, **kwargs) running_limit = config_file.get_config().get('bblio','crawler_instance_site_limit') instance_list = [] grouping_list = [('works','works'),('works dirty','works dirty'),('WIP','WIP'),('error','error'),('condemned','condemned'),('start','start'), ('test','test')] try: for i in getCrawlerInstances(): dict = get_job_status_count_for_instance(i.id) count = int(dict['pending']) + int(dict['running']) instance_list.append({'name':i.id,'choice_name': i.id + ' ' + str(count) + '/' + str(running_limit)}) except: pass instance_list.append({'name':'','choice_name':''}) instance_choices = ((i['name'],i['choice_name']) for i in instance_list) self.fields['instance'].widget = Select(attrs={'class': 'form-control input-sm'}, choices=instance_choices) self.fields['jurisdiction'].widget = Select(attrs={'class': 'form-control input-sm'}, choices=get_country_list()) self.fields['owner'].widget = Select(attrs={'class': 'form-control input-sm'}, choices=[(o, o) for o in config_file.get_config().get('bblio','owners').split(';')]) self.fields['grouping'].widget = Select(attrs={'class': 'form-control input-sm'}, choices=[(g,g) for g in config_file.get_config().get('bblio','grouping').split(';')])
def deploy(): ret = None for i in ec2.getCrawlerInstances(): if not i.ip_address: continue print "[%s] %s" % (i.id, i.ip_address) ssh_client = sshclient_from_instance(ec2.getInstanceFromInstanceName(i.id), host_key_file = '/home/ec2-user/.ssh/known_hosts', ssh_key_file=keys.aws_pem,user_name='ec2-user') #ssh_client = sshclient_from_instance(ec2.getInstanceFromInstanceName(i.id), host_key_file = '/home/ec2-user/.ssh/known_hosts', ssh_key_file="",user_name='ec2-user') ssh_client.put_file('/home/ec2-user/bblio/scraper/scrapyd.conf','/home/ec2-user/scrapyd.conf') home_dir = '/home/ec2-user/bblio/' copyList = [] copyList.append(home_dir + 'build/search/models.py') copyList.append(home_dir + 'build/search/__init__.py') copyList.append(home_dir + 'build/Build/__init__.py') copyList.append(home_dir + 'build/Build/settings.py.crawler') copyList.append(home_dir + 'build/Build/myScript.py.crawler') copyList.append(home_dir + 'build/manage.py') copyList.append(home_dir + 'build/__init__.py') copyList.append(home_dir + 'aws/ec2.py') copyList.append(home_dir + 'aws/keys.py') copyList.append(home_dir + 'aws/key.pem') copyList.append(home_dir + 'aws/__init__.py') copyList.append(home_dir + 'config_file.py') copyList.append(home_dir + '__init__.py') dirList = [] for c in copyList: c_dir = os.path.dirname(c) prev_dir = '' while c_dir != prev_dir and c_dir not in home_dir: if c_dir not in dirList: dirList.append(c_dir) prev_dir = c_dir c_dir = os.path.dirname(c_dir) dirList.append(home_dir) dirList.sort(lambda x,y: cmp(len(x), len(y))) for d in dirList: print('[dir][%s] %s' % (ssh_client.server.instance_id, d)) ssh_client.run('mkdir %s' % d) for c in copyList: print('[file][%s] %s' % (ssh_client.server.instance_id, c)) ssh_client.put_file(c,c.replace('.crawler','')) with open("/home/ec2-user/bblio/scraper/deployable/scrapy.cfg", "w") as f: f.write( """ [settings] default = deployable.settings [deploy] project = deployable\n """ ) f.write("url = http://") f.write(i.ip_address) f.write(":6800") print i.ip_address p = Popen(['scrapyd-deploy'],stdout=PIPE,shell=True,cwd='/home/ec2-user/bblio/scraper/deployable') j = None while True: out = p.stdout.read() if out == '' and p.poll() != None: break if out != '': if '{' in out: j = out j = json.loads(out) sys.stdout.write(out) sys.stdout.flush() #if j['status'] != 'ok': #ret = ret + str(i.ip_address) + ' failed\n' return ret