def __getitem__(self, index): hdfs_part = ("%s/" + self.path_fmt) % (self.path_base, index) _loger.debug("get sample from hdfs: " + hdfs_part) try: args = shlex("hadoop fs -cat " + hdfs_part) pp = Popen(args, stdout=PIPE) pp.wait(self.timeout) batch_inputs = [] targets = [] for line in pp.stdout: seg = line.strip().split() if len(seg) < 2: continue ids = defaultdict(list) p = seg[0].split(":") y = float(p[0]) next_item = p[1] # 下一个将观看的视频(item) targets.append(y) for p in seg[1:]: # 都是one-hot 特征 feature = p.split(":")[0] grp = self._get_feature_group(feature) ids[grp].append(self._get_feature_id(feature)) inputs = {} for grp in ids.keys(): inputs[grp] = np.asarray(ids[grp], dtype='int64') feature = next_item grp = self._get_feature_group(feature) ids[grp + "-0"].append(self._get_feature_id(feature)) batch_inputs.append(inputs) pp.stdout.close() return batch_inputs, targets except Exception: traceback.print_exc()
def __init__(self, query, domain=None, doCorrect=False): q = copy.deepcopy(query) self.q = re.sub("\/", "#", q) self.tokens = list(shlex(self.q)) self.noOfTokens = len(self.tokens) self.parse(self.tokens, doCorrect=doCorrect) if domain: self.insertDomain(domain)
def test_split(s, comments=False, posix=True): is_str = False if type(s) is str: s = unicode(s) is_str = True lex = shlex(s, posix=posix) lex.whitespace_split = True if not comments: lex.commenters = '' if is_str: return [ str(x) for x in list(lex) ] else: return list(lex)
def test_split(s, comments=False, posix=True): is_str = False if type(s) is str: s = unicode(s) is_str = True lex = shlex(s, posix=posix) lex.whitespace_split = True if not comments: lex.commenters = '' if is_str: return [str(x) for x in list(lex)] else: return list(lex)
def load_cases(filename): cases = [] file = open(filename) stream = shlex(file) num_cases = int(stream.get_token()) for i in xrange(0, num_cases): num_t = int(stream.get_token()) cur_case = [] for t in xrange(0, num_t): cur_case.append(int(stream.get_token())) cases.append(cur_case) file.close() return cases
def _parseLine(self, line=None): if line is None: line = self._getLineStr() result = [] parser = shlex(line) parser.whitespace_split = True while True: try: token = parser.get_token() except ValueError, ve: term.write( str(ve) + '\n') result = [] break else: if token == parser.eof: break result.append(token)
def __init__(self, fN=None, query_list=None): if fN is not None: Q = open(fN, 'r') QUERY = False lines = Q.readlines() Q.close() tmp_query = "" for l in lines: for token in list(shlex(l)): if token in START_TOKENS: QUERY = True tmp_query = token elif token in END_TOKENS: tmp_query += ' ' + token self.append(Query(tmp_query)) QUERY = False elif QUERY: tmp_query += ' ' + token else: pass if QUERY: tmp_query += '\n'
def cached_shlex(request, s): future = datetime.datetime.today() + datetime.timedelta(days=365) return JsonRpcResponse( shlex(request, response, s), headers={"Expires": future.strftime("%a, %d %b %Y %H:%M:%S GMT")} )
parser.add_option("-o", dest="outputFile", help="name of the output file") options, args = parser.parse_args() if len(args) < 3: parser.error("not enough input files") sys.exit(0) protein = args[0] solvent = args[1] top = args[2] if not os.path.exists(protein): print "Error: protein gro file was not found" sys.exit(0) if not os.path.exists(solvent): print "Error: solvent file was not found" sys.exit(0) if not os.path.exists(top): print "Error: topology file was not found" sys.exit(0) command="genbox -cp %s -cs %s -p %s -o" % (protein, solvent, top, parser.outputFile) code = subprocess.Popen(shlex(command).split) if code: print "Info: Finished with code", code
def scan_world(): command = "zmap --bandwidth=10M --target-port=80 --max-targets=10000 --output-file=results.txt" execute = shlex(command) subprocess.check_output(execute)
if(soft == 'suricata'): #make load-rules script f = open('/etc/sysconfig/suricata', 'w') f.write('OPTIONS="-i "'+interface) f.close() f = open('/etc/suricata/load-rules', 'w') f.write('#!/bin/bash\n\nrm -f /etc/suricata/json.rules\ntouch /etc/suricata/json.rules\nfor item in /etc/suricata/rules/*.rules; do echo " - $(basename $item)" >> /etc/suricata/json.rules; done\nsudo cat /etc/suricata/suricata.yaml > /etc/suricata/suricata.yaml.back\nsudo cat /etc/suricata/suricata.yaml | grep \'\\.rules\' -v | sed \'/rule-files:/ r /etc/suricata/json.rules\' > /etc/suricata/temp.rules\nsudo cat /etc/suricata/temp.rules > /etc/suricata/suricata.yaml\nrm -f json.rules\nrm -f temp.rules') f.close() subprocess.call(shlex.split('sudo chmod 755 /etc/suricata/load-rules')) #load current rules subprocess.call(shlex.split('sudo /etc/suricata/load-rules')) #make suricata start on boot subprocess.call(shlex.split('sudo chkconfig suricata on')) #mkdir for eve.json orig_file = [] subprocess.call(shlex('sudo mkdir '+suricata_data)) #change default data dir for suricata f = open('/etc/suricata/suricata.yaml', 'r') for line in f: orig_file.append(line) f.close() f = open('/etc/suricata/suricata.yaml', 'w') for line in orig_file: if('default-log-dir' in line): f.write('default-log-dir: '+suricata_data) else: f.write(line) f.close() if(soft == 'netsniff-ng'): #write configuration file f = open('/etc/sysconfig/netsniff-ng','w')
if logging is True: log.write(line) hook.wait() if os.path.exists('environment.import'): environment_import = open('environment.import', 'r') for variable in environment_import.readlines(): (key, value) = variable.split('=', 1) os.environ[key.strip()] = value.strip() environment_import.close() if logging is True: log.write('\n\nStart Hook Return Code: %s\n' % (hook.returncode)) else: if logging is True: log.write('\n\nNo Start Hook Found\n') os.system('%s %s' % (os.environ['PAS_EXECUTABLE'], shlex(os.environ['PAS_ARGUMENTS']))) log.close() sys.stdout.flush() sys.exit(0)
def configure(soft): #set hostname subprocess.call(shlex.split('sudo sethostname '+host+'.'+domain)) #configure installed software ##############Pinning multipe bro cores still needs to be figured out. Bro likes to pin physical not virtual if(soft == 'bro'): #make bro write json files f = open('/opt/bro/share/bro/site/scripts/json-logs.bro', 'w') f.write('@load tuning/json-logs\n\nredef LogAscii::json_timestamps = JSON::TS_ISO8601;\nredef LogAscii::use_json = T;') f.close() f = open('/opt/bro/share/bro/site/local.bro', 'a') f.write('@load scripts/json-logs') f.close() subprocess.call(shlex.split('sudo /opt/bro/bin/broctl install')) subprocess.call(shlex.split('sudo /opt/bro/bin/broctl stop')) #configure node.cfg f = open('/opt/bro/etc/node.cfg', 'w') #bro core list needs to be defined import multiprocessing virtual_cpus = multiprocessing.cpu_count() physical_cpus = virtual_cpus/2 cpu_ids = [] for i in physical_cpus: """ ------------------------- check if there really are n physical cores to assign ------------------------- """ #add cpu_id to list #cpu_ids.append() pass #returns a list of processor and core id's. ##grep -E 'processor|core.id' /proc/cpuinfo | xargs -L 2 """ ---------------------------------- bro core list code here ---------------------------------- """ f.write('[manager]\ntype=manager\nhost='+bro_manager+'\npin_cpus='+str(cpu_ids[0])+'\n\n[proxy-1]\ntype=proxy\nhost='+host+'\n\n[monitor]\ntype=worker\nhost='+host+'\ninterface='+interface+'\nlb_method=pf_ring\nlb_procs='+bro_cores+'\npin_cpus='+str(cpu_ids[1:])) f.close() #configure broctl.cfg orig_file = [] f = open('/opt/bro/etc/broctl.cfg','r') for line in f: orig_file.append(line) f.close() f = open('/opt/bro/etc/broctl.cfg','w') for line in orig_file: if('LogDir' in line): f.write('LogDir = '+bro_logs) else: f.write(line) f.close() #make broctl start on boot subprocess.call(shlex.split('sudo ln -s /opt/bro/bin/broctl /etc/init.d/')) subprocess.call(shlex.split('sudo service broctl deploy')) #mkdir for logs subprocess.call(shlex.split('sudo mkdir -p '+bro_logs)) subprocess.call(shlex.split('sudo chmod 744 -R '+bro_logs)) if(soft == 'suricata'): #make load-rules script f = open('/etc/suricata/load-rules', 'w') f.write('#!/bin/bash\n\nrm -f /etc/suricata/json.rules\ntouch /etc/suricata/json.rules\nfor item in /etc/suricata/rules/*.rules; do echo " - $basename $item)" >> /etc/suricata/json.rules; done\nsudo cat /etc/suricata/suricata.yaml > /etc/suricata/suricata.yaml.back\nsudo cat /etc/suricata/suricata.yaml | grep \'\\.rules\' -v | sed \'/rule-files:$/ r /etc/suricata/json.rules\' > /etc/suricata/temp.rules\nsudo cat /etc/suricata/temp.rules > /etc/suricata/suricata.yaml\nrm -f json.rules\nrm -f temp.rules') f.close() subprocess.call(shlex.split('sudo chmod 755 /etc/suricata/load-rules')) #load current rules subprocess.call(shlex.split('sudo /etc/suricata/load-rules')) #make suricata start on boot subprocess.call(shlex.split('sudo chkconfig suricata on')) #mkdir for eve.json orig_file = [] subprocess.call(shlex('sudo mkdir '+suricata_data)) #change default data dir for suricata f = open('/etc/suricata/suricata.yaml', 'r') for line in f: orig_file.append(line) f.close() f = open('/etc/suricata/suricata.yaml', 'w') for line in orig_file: if('default-logs-dir' in line): f.write('default-logs-dir: '+suricata_data) else: f.write(line) f.close() if(soft == 'netsniff-ng'): #write configuration file f = open('/etc/sysconfig/netsniff-ng','w') f.write('PROM_INTERFACE='+interface+'\nUSER=nobody\nGROUP=nobody\nINTERVAL='+netsniff_interval+'\nDATA_DIR='+netsniff_output) f.close() #make netsniff-ng service file f = open('/etc/systemd/system/netsniff-ng.service', 'w') f.write('[Unit]\nDescription=PCAP Collection Beast\nAfter=network.target\n\n[Service]\nEnvironmentFile=/etc/sysconfig/netsniff-ng\nExecStart=/sbin/netsniff-ng --in ${PROM_INTERFACE} --out ${DATA_DIR} --silent --user ${USER} --group ${GROUP} --interval ${INTERVAL}\nType=simple\n\n[Install]\nWantedBy=multi-user.target') f.close() #mkdir for pcap storage #should add check for interface or dir, current usecase will result in directory 99% of the time subprocess.call(shlex.split('sudo mkdir -p '+netsniff_output)) subprocess.call(shlex.split('sudo chown nobody:nobody '+netsniff_output)) pass if(soft == 'logstash'): #should be dynamically set prior to getting to this. if(logstash_bro_elasticsearch or logstash_suricata_elasticsearch): #setup logstash to es if(logstash_bro_elasticsearch): """ --------------------------- bro to elasticsearch config file --------------------------- """ pass if(logstash_suricata_elasticsearch): """ --------------------------- Suricata to elasticsearch config file --------------------------- """ pass elif(logstash_bro_kafka != '' or logstash_suricata_kafka != ''): #setup logstash to kafka if(logstash_bro_kafka != ''): """ --------------------------- bro to elasticsearch config file --------------------------- """ pass if(logstash_suricata_kafka != ''): """ --------------------------- Suricata to kafka config file --------------------------- """ pass elif(logstash_kafka_elasticsearch): if(logstash_kafka_elasticsearch_only): """ -------------------------------------- kafka to es config file -------------------------------------- """ pass else: """ -------------------------------------- bro/suricata -> kafka -> elasticsearch -------------------------------------- """ pass if(soft == 'elasticsearch'): #configure yml file f = open('/etc/elasticsearch/elasticsearch.yml', 'w') #node name f.write('node.name: '+elasticsearch_node_name+'\n') #cluster name f.write('cluster.name: '+elasticsearch_cluster_name+'\n') #shards f.write('index.number_of_shards: '+str(elasticsearch_shards)+'\n') #replicas f.write('index.number_of_replicas: '+elasticsearch_replicas+'\n') #data path f.write('path.data: '+elasticsearch_path_data+'\n') #logs path f.write('path.logs: '+elasticsearch_path_logs+'\n') #plugins path f.write('path.plugins: '+elasticsearch_path_plugins+'\n') #work path f.write('path.work: '+elasticsearch_path_work+'\n') #unicast/master discovery #create formated string temp = '[' for i in elasticsearch_master_discovery: temp +='"'+i.split(',')[0]+'",' #remove extra , (comma) from string temp = temp[:-1] #complete list temp += ']' f.write('discovery.zen.ping.unicast.hosts: '+temp+'\n') #master node f.write('node.master: '+str(elasticsearch_master_node).lower()+'\n') #data node f.write('node.data: '+str(elasticsearch_data_node).lower()+'\n') f.close() #configure heap orig_file = [] f = open('/etc/sysconfig/elasticsearch', 'r') for line in f: orig_file.append(line) f.close() f = open('/etc/sysconfig/elasticsearch', 'w') for line in orig_file: if('ES_HEAP_SIZE' in line): f.write('ES_HEAP_SIZE='+str(elasticsearch_heap)+'g') else: f.write(line) f.close() #mkdirs for path subprocess.call(shlex.split('sudo mkdir -p '+elasticsearch_path_data)) subprocess.call(shlex.split('sudo mkdir -p '+elasticsearch_path_work)) subprocess.call(shlex.split('sudo mkdir -p '+elasticsearch_path_logs)) if(soft == 'kibana'): #still looking into possible solution pass if(soft == 'pfring'): #create ifup-local script f = open('/sbin/ifup-local','w') f.write('#!/bin/bash\n\ncase "$1" in\np1p2)\n\techo "turning off offloading on $1"\n\t/sbin/ethtool -K $1 tso off gro off lro off gso off rx off tx off sg off rxvlan off txvlan off\n\tethtool -N $1 rx-flow-hash udp4 sdfn\n\tethtool -N $1 rx-flow-hash udp6 sdfn\n\tethtool -C $1 adaptive-rx off\n\tethtool -C $1 rx-usecs 1000\n\tethtool -G $1 rx 4096\n\n;;\n*)\n;;\nesac\nexit 0') f.close() subprocess.call(shlex.split('sudo chmod 755 /sbin/ifup-local')) #configure interface subprocess.call(shlex.split('sudo /sbin/ifup-local '+interface)) if(soft == 'nginx'): #Configure for kibana nginx proxy f = open('/etc/nginx/conf.d/kibana.conf','w') """ ------------------------ confirm config file for syntax ------------------------ """ f.write('server {\n\tlisten 80;\n\tserver_name kibana;\n\tauth_gss off;\n\tauth_gss_keytab /etc/nginx/ipa.keytab;\n\n\tlocation / {\n\t\tproxy_pass http://localhost:5601;\n\t\tproxy_http_version 1.1;\n\t\tproxy_set_header upgrade $http_upgrade;\n\t\tproxy_set_header connection \'upgrade\';\n\t\tproxy_set_header host $host;\n\t\tproxy_cache_bypass $http_upgrade;\n\t}\n}') f.close()