def create_index(index): status = check_index(index) if not status: create_index_util(index) log("Index Created in main") get_index_mapping(index)
def final_optimised_model(self, model, _train, y, epochs=None, batch_size=None): try: # fit the keras model on the dataset model.fit(_train, y, epochs=epochs, batch_size=batch_size) # evaluate the keras model _, accuracy = model.evaluate(_train, y) print('Accuracy: %.2f' % (accuracy*100)) # make class predictions with the model # predictions_2 = model.predict_classes(_train) time.sleep(10) try: # serialize model to JSON model_json = model.to_json() with open("model.json", "w") as json_file: json_file.write(model_json) # serialize weights to HDF5 model.save_weights("model.h5") time.sleep(100) except Exception as e: log('----------Error in Model.json ----------:{}'.format(e), 'error') raise e # if 'model.json' and 'model.h5': # load json and create model print("End of the model running") except Exception as e: log('----------Error in Clean_ts function ----------:{}'.format(e), 'error') raise e return
def data_processing(self, data): try: data[self.textcolumn] = data[self.textcolumn].apply(self.text_preprocess) # data['content'] = self.text_preprocess(data['content']) data = data[data[self.engagement] > 0] data['engagement_bucket'] = pd.qcut(data[self.engagement], q=[0,0.5, 0.75, 1], labels=['Low', 'Medium', 'High']) # Creating time related features such as time, day, etc. data['day'] = data[self.date].dt.day data['hour'] = data[self.date].dt.hour data['week_day'] = data[self.date].dt.weekday # hour = data.groupby('hour')[self.engagement].mean() # weekday = data.groupby('week_day')[self.engagement].mean() # dayofmonth = data.groupby('day')[self.engagement].mean() X = data[['word_count', 'hour', 'week_day']] X = pd.get_dummies(X, drop_first=True) X[self.textcolumn] = data[self.textcolumn] X.reset_index(drop=True,inplace=True) y= data['engagement_bucket'] # y = pd.get_dummies(y) except Exception as e: log('----------Error in Data Processing ----------:{}'.format(e), 'error') raise e return X, y
def BAD_SYMBOLS_RE(self): try: BAD_SYMBOLS_RE = re.compile("[^0-9a-z #+_]") except Exception as e: log('----------Error in BAD_SYMBOLS_RE function ----------:{}'.format(e), 'error') raise e return BAD_SYMBOLS_RE
def balancing(self,_train , y ): try: smote = SMOTE('minority') _train, y = smote.fit_sample(_train, y) except Exception as e: log('----------Error in Smote ----------:{}'.format(e), 'error') raise e return _train, y
def REPLACE_BY_SPACE_RE(self): try: REPLACE_BY_SPACE_RE = re.compile("[/(){}\[\]\|@,;!]") except Exception as e: log('----------Error in REPLACE_BY_SPACE_RE function ----------:{}'.format(e), 'error') raise e return REPLACE_BY_SPACE_RE
def optimized_model(self): try: model = self.model() loaded_model = self.final_optimised_model(model, self._train, self.label,self.best_params['epochs'], self.best_params['batch_size']) except Exception as e: log('----------Error in Optimized Modeln ----------:{}'.format(e), 'error') raise e return loaded_model
def text_preprocess(self, text): try: """ text: a string return: modified initial string """ negation = ["no", "nor", "not", "don", "don't", "aren", "aren't", "couldn", "couldn't", "didn", "didn't", "doesn", "doesn't", "hadn", "hadn't", "hasn", "hasn't", "haven", "haven't", "isn", "isn't", "mightn", "mightn't", "mustn", "mustn't", "needn", "needn't", "shan", "shan't", "shouldn", "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't"] stop = set(stopwords.words('english')) - set(negation) # Custom stopwords stoplist = ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'rt', 'rt', 'qt', 'for', 'the', 'with', 'in', 'of', 'and', 'its', 'it', 'this', 'i', 'have', 'has', 'would', 'could', 'you', 'a', 'an', 'be', 'am', 'can', 'edushopper', 'will', 'to', 'on', 'is', 'by', 'ive', 'im', 'your', 'we', 'are', 'at', 'as', 'any', 'ebay', 'thank', 'hello', 'know', 'need', 'want', 'look', 'hi', 'sorry', 'http', 'body', 'dear', 'hello', 'hi', 'thanks', 'sir', 'tomorrow', 'sent', 'send', 'see', 'there', 'welcome', 'what', 'well', 'us'] stop.update(set(stoplist)) REPLACE_BY_SPACE_RE = re.compile("[/(){}\[\]\|@,;!]") BAD_SYMBOLS_RE = re.compile("[^0-9a-z #+_]") text = re.sub(r'\d', '', str(text)) # removing digits text = re.sub(r"(?:\@|https?\://)\S+", "", str(text)) # removing mentions and urls text = text.lower() # lowercase text text = re.sub('[0-9]+', '', text) text = REPLACE_BY_SPACE_RE.sub(" ", text) # replace REPLACE_BY_SPACE_RE symbols by space in text text = BAD_SYMBOLS_RE.sub(" ", text) # delete symbols which are in BAD_SYMBOLS_RE from text text = ' '.join([word for word in text.split() if word not in stop]) # delete stopwors from text text = text.strip() except Exception as e: log('----------Error in Text Processing ----------:{}'.format(e), 'error') raise e return text
def tryUse(self): try: log('try') r = 10 / 0 log(f'result = {r}') except ZeroDivisionError as e: log(f'except = {e}') finally: log('finally') log('end')
def TfidfVectorizer(self, X, y ): try: vec = TfidfVectorizer(strip_accents='unicode', ngram_range=(1,2), max_features=3000, smooth_idf=True, sublinear_tf=True) train_vec = vec.fit_transform(X[self.textcolumn]) _train = np.hstack([X.drop(self.textcolumn, axis=1), train_vec.toarray()]) y = LabelEncoder().fit_transform(y) scaler = Normalizer().fit(_train) _train = scaler.transform(_train) except Exception as e: log('----------Error in TfidVectorizer ----------:{}'.format(e), 'error') raise e return _train , y
def result(self): try: data = self.read_data() X,y = self.data_processing(data) _train, y = self.TfidfVectorizer(X,y) _train, y = self.balancing(_train, y) self._train = _train self.label = y best_score, best_params = self.updating_hyperameters(self.model, _train, y) self.best_params = best_params except Exception as e: log('----------Error in Result ----------:{}'.format(e), 'error') raise e return self.optimized_model()
def read_file(): try: f = open('C:/Users/agoto/Desktop/log.txt') log(f.read()) finally: if f: f.close() # Python引入了with语句来自动帮我们调用close()方法 # 标示符'r'表示读,读取二进制文件,比如图片、视频等等,用'rb'模式打开 with open('C:/Users/agoto/Desktop/log.txt', 'r') as f: print(f.read(100)) for line in f.readlines(): print(line.strip()) # codecs模块帮我们在读文件时自动转换编码,直接读出unicode with codecs.open('C:/Users/agoto/Desktop/test.log', 'r', 'gbk') as f: print(f.read())
def read_data(self): try: data = pd.read_excel(self.dataset, parse_dates=[self.date]) data[self.date] = pd.to_datetime(data[self.date], errors='coerce') # data[self.date] = data[self.date].dt.strftime('%Y/%m/%d') data[self.engagement] = data[self.engagement].astype(int) data[self.wordcount] = data[self.wordcount].astype(int) # source['Tweet_type'] = source['Tweet_type'].astype('category') print(data[[self.date, self.textcolumn, self.wordcount, self.engagement]]) source = data[[self.date, self.textcolumn, self.wordcount, self.engagement]] except Exception as e: log('----------Error in Read data ----------:{}'.format(e), 'error') raise e return source
def model(self, kernel_initializer='glorot_uniform', activation = 'relu', dropout_rate=0.5, weight_constraint=0): try: # define the keras model model = Sequential() model.add(Dense(300, input_dim=3003, activation=activation, kernel_initializer=kernel_initializer, kernel_constraint=min_max_norm(min_value=1.0, max_value=1.0))) model.add(Dropout(dropout_rate)) model.add(Dense(200, activation=activation, kernel_initializer=kernel_initializer,kernel_constraint=min_max_norm(min_value=1.0, max_value=1.0))) model.add(Dropout(dropout_rate)) model.add(Dense(100, activation=activation, kernel_initializer=kernel_initializer, kernel_constraint=min_max_norm(min_value=1.0, max_value=1.0))) model.add(Dropout(dropout_rate)) model.add(Dense(3, activation='softmax', kernel_initializer=kernel_initializer, kernel_constraint=min_max_norm(min_value=1.0, max_value=1.0))) # compile the keras model # optimizer = SGD(lr=learn_rate, momentum=momentum) model.compile(loss='sparse_categorical_crossentropy', optimizer = 'adam', metrics=['accuracy']) except Exception as e: log('----------Error in Clean_ts function ----------:{}'.format(e), 'error') raise e return model
def updating_hyperameters(self, create_model=None, X=None, Y=None): try: model = KerasClassifier(build_fn=create_model, verbose=1) # define the grid search parameters batch_size = [10, 20, 40, 60, 80, 100] epochs = [10, 50, 100] optimizer = ['SGD', 'Adam'] learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3] momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9] # init_mode = ['uniform', 'glorot_uniform', 'glorot_normal', 'normal', 'zero'] # activation = ['softmax', 'relu', 'tanh', 'sigmoid', 'linear'] weight_constraint = [1, 2, 3, 4, 5] # dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] param_grid = dict(batch_size=batch_size, epochs=epochs) grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3) grid_result = grid.fit(X, Y) best_score = grid_result.best_score_ best_params = grid_result.best_params_ except Exception as e: log('----------Error in updating Parameters ----------:{}'.format(e), 'error') raise e return best_score, best_params
def home(): log("HOME") # Pip package utils num = add(1, 2) # DB log("GETTING FROM DB") output = "dummy_db_output" try: conn = db.connection cur = conn.cursor() cur.execute("select * from test limit 1;") output = cur.fetchall() except Exception as e: log("ERROR DB") log(e) pass # Proto log("CALLING MICROSERVICE") channel = grpc.insecure_channel('{}:50051'.format(GRPC_HOST)) stub = microservice_pb2_grpc.BlogServiceStub(channel) response = stub.GetBlog(microservice_pb2.GetBlogRequest()) # Kafka -- not working on Kubernetes, comment out for simplicity # log("SENDING TO KAFKA") # future = producer.send(TOPIC, {"test": "hello"}).add_callback(on_send_success).add_errback(on_send_error) # result = future.get(timeout=5) # log("Result = {}".format(result)) return jsonify({ "db": str(output), # "topic": result.topic, "lib": num, "microservice": str(response.blog) })
def get_all_url(): full_list = [] state_names = os.getenv('state') state_names = state_names.split(' ') for state in state_names: base_urls = get_state_url(state) pdf_urls = [] rec_urls = [] for start_url in base_urls: if "pdf" in start_url['url'].lower(): pdf_urls.append(start_url) elif "corona" in start_url['url'].lower( ) or "covid" in start_url['url'].lower(): if "cdc" in start_url['url'].lower(): full_list.append(start_url) else: rec_urls.append(start_url) else: full_list.append(start_url) # log(len(rec_urls)) for rec_url in rec_urls: urls = get_all_url_utils(rec_url) for url in urls: list_urls = get_all_url_utils(url) try: for i in list_urls: if i not in full_list: # log("adding to list: " + i['url']) full_list.append(i) except TypeError: log('Whoops wrong content passed ' + TypeError.with_traceback()) # log(rec_url + " " + str(len(full_list))) for pdf_url in pdf_urls: full_list.append(pdf_url) log("Pdf count " + str(len(pdf_urls))) log(state + " " + str(len(full_list))) return full_list
def delete_index(index): es.indices.delete(index=index, ignore=[400, 404]) log("Deleted")
def get_all_url_utils(url): # log("Requested url is: " + url['url']) home_page_url = url['url'].split('/') if len(home_page_url) > 1: home_page_url = home_page_url[0] + '//' + home_page_url[2] url_list = [] try: resp = urllib.request.urlopen(url['url']) soup = BeautifulSoup(resp, from_encoding=resp.info().get_param('charset'), features="html.parser") for link in soup.find_all('a', href=True): if "corona" in link['href'].lower( ) or "covid" in link['href'].lower(): if ignore_urls(link): continue url_dict = {} if link['href'].startswith('http'): url_dict = { 'url': link['href'], 'county': url['county'], 'contentType': url['contentType'], 'channel': url['channel'] } else: url_dict = { 'url': home_page_url + link['href'], 'county': url['county'], 'contentType': url['contentType'], 'channel': url['channel'] } url_list.append(url_dict) except urllib.error.HTTPError: log("urllib error http error for " + url['url']) pass except urllib.error.URLError: log("url error for " + url['url']) pass except ValueError: log("value error for " + url['url']) pass except ConnectionResetError: log("connection reset error " + url['url']) pass except http.client.InvalidURL: log("http client invalid url for " + url['url']) pass except UnboundLocalError: log("unbound local error for " + url['url']) pass except http.client.IncompleteRead: log("http client incomplete read " + url['url']) pass except TypeError: log("Type error for " + url['url']) pass return url_list
# import states # For server es = Elasticsearch([os.getenv('elastic_server_host')], http_auth=(os.getenv('elastic_username'), os.getenv('elastic_password')), scheme="https", port=os.getenv('elastic_port'), verify_certs=False) # For Azure Cloud # es = Elasticsearch(os.getenv('elastic_azure_host') + os.getenv('elastic_azure_port'), # http_auth=(os.getenv('elastic_azure_username'), os.getenv('elastic_azure_password'))) log(es.info()) def create_index_util(index): request_body = { "settings": { "index": { "analysis": { "analyzer": { "analyzer_shingle": { "tokenizer": "standard", "filter": ["lowercase", "filter_shingle"] } }, "filter": { "filter_shingle": {
def on_send_success(metadata): log("SUCCESSFULLY SENT TO KAFKA")
def get_index_mapping(index): log(es.indices.get_mapping(index=index))
def create_index_util(index): request_body = { "settings": { "index": { "analysis": { "analyzer": { "analyzer_shingle": { "tokenizer": "standard", "filter": ["lowercase", "filter_shingle"] } }, "filter": { "filter_shingle": { "type": "shingle", "max_shingle_size": 4, "min_shingle_size": 2, "output_unigrams": "true" } } } } }, "mappings": { "properties": { "channel": { "type": "keyword" }, "contentType": { "type": "keyword" }, "contentdiff": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "county": { "type": "keyword" }, "createdatetime": { "type": "text" }, "htmldiff": { "type": "text" }, "state": { "type": "keyword" }, "url": { "type": "text" }, "title": { "type": "keyword" }, "previewText": { "type": "nested", "properties": { "title": { "type": "text" }, "description": { "type": "text" }, "image": { "type": "text" }, "website": { "type": "text" } } } } }, } log("Creating Index") es.indices.create(index=index, body=request_body) log("Index Created")
urls = get_all_url_utils(rec_url) for url in urls: list_urls = get_all_url_utils(url) try: for i in list_urls: if i not in full_list: # log("adding to list: " + i['url']) full_list.append(i) except TypeError: log('Whoops wrong content passed ' + TypeError.with_traceback()) # log(rec_url + " " + str(len(full_list))) for pdf_url in pdf_urls: full_list.append(pdf_url) log("Pdf count " + str(len(pdf_urls))) log(state + " " + str(len(full_list))) return full_list if __name__ == "__main__": urls = get_all_url() # url = { # "url": "www.coronavirus.kdheks.gov", # "county": "hennepin", # "contentType": "provider", # "channel": "url" # } # urls = get_all_url_utils(url) for url in urls: log(url)
def find_change(content): log("job started") # urls = [{"url":"https://www.denvergov.org/content/dam/denvergov/Portals/771/documents/covid-19/FaceCoveringRequired_site_11x17.pdf"}] urls = get_all_url() log("total urls to be crawled " + str(len(urls))) html_diff = difffile.HtmlDiff(tabsize=4, wrapcolumn=80) connection_error_urls = [] could_not_retrive_urls = [] stop_iteration_urls = [] new_urls = [] for url in urls: pdf_content = False # log("going to " + url['url']) requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += 'HIGH:!DH:!aNULL' try: requests.packages.urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST += 'HIGH:!DH:!aNULL' except AttributeError: # no pyopenssl support used / needed / available pass try: req = requests.get(url['url']) except requests.exceptions.ConnectionError: connection_error_urls.append(url) continue except requests.exceptions.TooManyRedirects: log("Too many redirects for: " + url['url']) continue except requests.exceptions.ChunkedEncodingError: log("Chunked encoding error " + url['url']) continue except requests.exceptions.InvalidURL: log("requests exception invalid url" + url['url']) continue except requests.exceptions.MissingSchema: log("request exception missing schema: " + url['url']) continue except UnicodeError: log("unicode error for " + url['url']) continue except requests.exceptions.InvalidSchema: log("requests exceptions invalid schema " + url['url']) continue if req.status_code in [200]: try: if req.headers['Content-Type'].split( ';')[0] == 'application/pdf': pdf_content = True html = read_from_url( url['url']) # still keeping the variable name as html if html == "failed": log("pdf ocr failed for url - " + url['url']) continue else: html = req.text except KeyError: log("Key error for " + url['url']) else: could_not_retrive_urls.append(url) html = None continue if url['url'] not in content.keys(): content[url['url']] = html new_urls.append(url) else: old_html = content[url['url']] current_html = html content[url['url']] = current_html old_html_body = old_html current_html_body = current_html if not pdf_content: try: current_html = BeautifulSoup( current_html.encode('utf-8').decode('ascii', 'ignore'), "html.parser") except UnboundLocalError: log("unbound local error for " + url['url']) continue except TypeError: log("type error for " + url['url']) continue [ s.extract() for s in current_html.findAll(['script', 'style']) ] try: current_html_body = current_html.find('body').text except AttributeError: log("attribute error for " + url['url']) continue old_html = BeautifulSoup( old_html.encode('utf-8').decode('ascii', 'ignore'), "html.parser") [s.extract() for s in old_html.findAll(['script', 'style'])] if not old_html.find('body'): continue old_html_body = old_html.find('body').text md5_1 = hashlib.md5() try: md5_1.update(old_html_body.encode('utf-8')) except AttributeError: log("Attribute error " + url['url']) continue hash_1 = md5_1.hexdigest() md5_2 = hashlib.md5() md5_2.update(current_html_body.encode('utf-8')) hash_2 = md5_2.hexdigest() if hash_1 == hash_2: pass else: if not pdf_content: parsed_curr_html = BeautifulSoup( html.encode('utf-8').decode('ascii', 'ignore'), "html.parser") title = '' if parsed_curr_html.find('head'): if parsed_curr_html.head.find('title'): title = parsed_curr_html.head.find('title').text try: html_diffs = html_diff.make_file( old_html_body.splitlines(), current_html_body.splitlines(), '<h2><a href= %s>%s</a></h2>' % ("\"" + url['url'] + "\"", title), "", context=True, numlines=3) except StopIteration: log("stop iteration for " + url['url']) stop_iteration_urls.append(url) continue except RecursionError: log("recursion error for " + url['url']) continue else: title = url['url'] try: html_diffs = html_diff.make_file( old_html.splitlines(), current_html.splitlines(), '<h2><a href= %s>%s</a></h2>' % ("\"" + url['url'] + "\"", title), "", context=True, numlines=3) except StopIteration: stop_iteration_urls.append(url) continue diff_json = get_diff_json(html_diffs, url['url']) if len(diff_json) > 0: title = "" description = "" image = "" website = "" if not pdf_content: try: dict_elem = link_preview.generate_dict(url['url']) except urllib.error.HTTPError: log("urllib error http error " + url['url']) continue except UnicodeEncodeError: log("unicode encode error " + url['url']) continue except IndexError: log("index error for " + url['url']) continue except http.client.InvalidURL: log("http client invalid url " + url['url']) continue except UnicodeDecodeError: log("unicode decode error " + url['url']) continue except urllib.error.URLError: log("urllib error url error " + url['url']) continue except ConnectionResetError: log("connection reset error " + url['url']) continue title = dict_elem['title'] description = dict_elem['description'] image = dict_elem['image'] website = dict_elem['website'] html_diffs += ('<br><br>') # ADD These html_diffs += ('page link - <a href= %s>%s</a></h2>' % ("\"" + url['url'] + "\"", url['url'])) html_diffs += ('<br><br>') # log(html_diffs) index = "state_" + os.getenv('state').lower().replace( ' ', '_') state = os.getenv('state') page_url = url['url'] htmldiff = html_diffs createdatetime = datetime.now() county = url['county'] contentType = url['contentType'] channel = url['channel'] contentdiff = "" preview = { "title": title, "description": description, "image": image, "website": website } # log(preview) document = { "state": state, "channel": channel, "contentType": contentType, "contentdiff": contentdiff, "county": county, "createdatetime": createdatetime, "htmldiff": htmldiff, "state": state, "url": page_url, "title": title, "previewText": preview } feed_documents(index, document) for json in diff_json: feed_nlp_document("nlp_data", json) log("job finished")
"contentType": contentType, "contentdiff": contentdiff, "county": county, "createdatetime": createdatetime, "htmldiff": htmldiff, "state": state, "url": page_url, "title": title, "previewText": preview } feed_documents(index, document) for json in diff_json: feed_nlp_document("nlp_data", json) log("job finished") log("crawler started") first_run = False find_change(content) first_run = True schedule.every().day.at("13:00").do(find_change, content) schedule.every().day.at("01:00").do(find_change, content) log("schedule started") while True: schedule.run_pending() time.sleep(1) # find_change(content) # find_change(content)
}, auth=(os.getenv('elastic_username'), os.getenv('elastic_password')), verify=False).json() # for azure # response = requests.post(url, # data=payload, # headers={'Content-Type': 'application/json'}, # auth=(os.getenv('elastic_azure_username'), os.getenv('elastic_azure_password')), # verify=False).json() # log(json.dumps(response, indent=2, sort_keys=True)) hits = response["hits"]["hits"] source = hits[0]["_source"] state_urls = source["state_urls"] urls = [] for state_url in state_urls: # urls.append(state_url["url"]) if "facebook" in state_url["url"] or "twitter" in state_url["url"]: continue urls.append(state_url) # log(len(urls)) return urls if __name__ == "__main__": state_urls = get_state_url("pennsylvania") for state_url in state_urls: log(state_url)
def on_send_error(excp): log("ERROR AFTER SENT TO KAFKA")
def consumer_notify_success(): log("SUCCESSFULLY CONSUMED BY CONSUMER") # If you trigger an emit in a regular function, it will default to a broadcast socketio.emit('isConsumerWorking', {'data': 'triggered consumer_notify_success'})
def search_index(index): results = es.search(body={"query": {"match_all": {}}}, index=index) for each in results['hits']['hits']: each = each['_source']['Url'] log(each)