def deflat(a_dict): mylist = [] for key, item in a_dict.items(): mydict={} mydict["source"] = 'facebook_profile' myItem = c.cleanText(str(item)) myKey = c.cleanText(str(key)) mydict["term"] = str(myItem) mydict["key"] = str(myKey) mylist.append(mydict) return mylist
def bulkJsonData(json_file, _index, whatStuff): json_list = c.getDataFromFile(json_file) for doc in json_list: # use a 'yield' generator so that the data isn't loaded into memory if '{"index"' not in doc: json_doc = json.loads(doc) #print (doc) my_text = json_doc["like"]["fullText"] clean_my_text = c.cleanText(my_text) json_doc["like"].update([("fullText", clean_my_text)]) # add load_type, used later for filter json_doc.update([("load_type", whatStuff)]) json_doc.update([("source_type", "twitter")]) new_doc = str(json_doc).replace("'", '"') #print (new_doc) yield {"_index": _index, "_id": uuid.uuid4(), "_source": new_doc}