def check(path): if path[-1] != "/": path += "/" print "Checking JSON files in", path, "for errors" path, dirs, files = os.walk(path).next() total_files = len(files) - 1 errors = [] for i in range(0, total_files): f = open(path + "Content" + str(i) + ".json", "r") f = json.load(f) if "error" in f: errors.append("Content " + str(i)) print_progress(i, total_files, prefix='Progress:', suffix='Complete') print "\nChecking Completed" if len(errors) == 0: print "No Errors Found!" else: print "Errors found in:" for i in errors: print i
def handle(self, *args, **options): greekStrongVerses = BibleText.objects.filter( versText__icontains='<gr rmac=', translationIdentifier=BibleTranslation.objects.filter( identifier='GNTTR')) sgreek = ElementTree.parse("./strongsgreek.xml").getroot() entries = sgreek.findall(".//entries/entry") # Create a dictionary of strong numbers strongdict = {} for onegreek in entries: strongdict[int(onegreek.get('strongs'))] = onegreek count = 0 for vers in greekStrongVerses: # get the vers in another translation # trWord = BibleTranslation.objects.filter(identifier='ELB1905STR') # trVers = BibleVers.objects.filter(versNr=vers.versNr, chapterNr=vers.chapterNr, bookNr=vers.bookNr, translationIdentifier=trWord) regex = re.compile( "^.*rmac=\"([^\"]*)\" str=\"([^\"]*)\">([^<]*)<", re.MULTILINE) if regex is not None: found = regex.findall(vers.versText) for one in found: # find vers in translation # regex2 = re.compile("^.*str=\"" + one[1] + "\".*>(.*)<.*", re.MULTILINE) # word = '' # if regex2 is not None: # found2 = regex2.findall(trVers[0].versText) # Todo: Handle multiple strong numbers in one verse # if len(found2) > 1: # word = ' oder '.join(found2) # elif len(found2) > 0: # word = found2[0] bvers = BibleVers.objects.filter( bookNr=vers.vers.bookNr, versNr=vers.vers.versNr, chapterNr=vers.vers.chapterNr) if bvers.count() > 0: if int(one[1]) in strongdict: translit = strongdict[int( one[1])].find('./greek').get('translit') strong = StrongNr(pronounciation=translit, strongNr=int(one[1]), grammar=one[0], translationIdentifier=vers. translationIdentifier, greek=one[2], vers=bvers[0]) try: strong.save() except Exception as exc: self.stdout.write(str(exc)) count += 1 print_progress(count, greekStrongVerses.count())
def likes(): likers = {} path = "json/likes/" file_count = len(glob.glob('json/likes/*')) - 1 for i in range(0, file_count): f = open(path + "content" + str(i) + ".json", "r") f = json.load(f) print_progress(i, file_count, prefix='Progress:', suffix='Complete') for com in f["data"]: poster = com["name"] if poster in likers: likers[poster] += 1 else: likers[poster] = 1 return likers
def handle(self, *args, **options): f = open('./bibleBooks_de.txt', 'r') bookNr = 0 for line in f: bookNr += 1 ele = line.split(',') if len(ele) >= 2: ele = [x.strip() for x in ele] bookNames = BibleBook.objects.filter(nr=bookNr, language='de') if bookNames.count() > 0: bookNames = bookNames[0] else: bookNames = BibleBook() bookNames.nr = bookNr bookNames.language = 'de' bookNames.name = ele[0] if len(ele) > 1: bookNames.short_name = ele[1] if len(ele) > 2: bookNames.alternativeNames = ',' + string.join(ele[2:], ',') + ',' bookNames.save() print_progress(bookNr, 66)
def comment(): allcomments = "" commenters = {} path = "json/comments/" file_count = len(glob.glob('json/comments/*')) - 1 for i in range(0, file_count): f = open(path + "content" + str(i) + ".json", "r") f = json.load(f) print_progress(i, file_count, prefix='Progress:', suffix='Complete') for com in f["data"]: poster = com["from"]["name"] if poster in commenters: commenters[poster] += 1 else: commenters[poster] = 1 allcomments += com["message"].lower() + " " return allcomments, commenters
def handle(self, *args, **options): f = open('./bibleBooks_de.txt', 'r') bookNr = 0 for line in f: bookNr += 1 ele = line.split(',') if len(ele) >= 2: ele = [x.strip() for x in ele] bookNames = BibleBook.objects.filter(nr=bookNr, language='de') if bookNames.count() > 0: bookNames = bookNames[0] else: bookNames = BibleBook() bookNames.nr = bookNr bookNames.language = 'de' bookNames.name = ele[0] if len(ele) > 1: bookNames.short_name = ele[1] if len(ele) > 2: bookNames.alternativeNames = ',' + string.join( ele[2:], ',') + ',' bookNames.save() print_progress(bookNr, 66)
def post(): #Dictionary and Lists top_liked = {} top_commented = {} most_posted = {} all_info = [] message_words = "" # Opens new file to record id record = open('post_ids', 'w') # Updates a top N list def updatedict(dict, id, value, top=10): if len(dict) < top: dict[id] = value return smallest = min(dict.items(), key=lambda x: x[1]) if value > smallest[1]: del dict[smallest[0]] dict[id] = value file_count = len(glob.glob('json/posts/*')) - 1 # Iterates through every json file for i in range(0, file_count): # Opens and loads the file f = open("json/posts/content"+ str(i) +".json", "r") f = json.load(f) # Updates the progress bar print_progress(i, file_count-1, prefix = 'Progress:', suffix = 'Complete') # Iterates through each post for post in f["data"]: # Isolates important variables of each post id = post["id"] if "attachments" in post: if "url" in post["attachments"]["data"][0]: url = post["attachments"]["data"][0]["url"] else: url = "https://www.facebook.com/groups/1717731545171536/permalink/" + id[17:] else: url = "https://www.facebook.com/groups/1717731545171536/permalink/" + id[17:] likes = int(post["likes"]["summary"]["total_count"]); comments = int(post["comments"]["summary"]["total_count"]) date = datetime.datetime.strptime(str(post["created_time"]), "%Y-%m-%dT%H:%M:%S+0000") time = str(post["created_time"]) poster = post["from"]["name"] # records post id record.write(id + "\n") # Upadates top 10 lists updatedict(top_liked, url, likes) updatedict(top_commented, url, comments) if poster in most_posted: most_posted[poster] += 1 else: most_posted[poster] = 1 # Adds post to general all post info all_info.append([id, likes, comments, time]) # Collects the post message, if it exists if "message" in post: message_words += " " + post["message"].lower() record.close() return top_liked, top_commented, most_posted, all_info, message_words
def insert_osis_bible(self, xmltree, title, identifier=None, lang='GER'): def __insert(translation, book, chapter, vers, text): ''' Insert the bible text into the database. Create the BibleVers and the BibleText if it does not exist. @translation is a BibleTranslation instance @book is a BibleBook instance @chapter and @vers are integers @text is a string ''' # vnumber can contain multiple verses. In NGUE it is seperated by a 8209 (e.g. 16-17 is defined # as 16820917. So we have to check if this is the case, then separate the verse numbers, insert the # first one and every following as an empty verse. numverses = 1 if str(vers).__contains__('8209'): vers, lastvers = int(str(vers).split('8209')[0]), int( str(vers).split('8209')[1]) numverses = lastvers - vers + 1 # Does this vers already exist? v = BibleVers.objects.filter(bookNr=book, chapterNr=chapter, versNr=vers) if v.count() <= 0: v = BibleVers(bookNr=book, chapterNr=chapter, versNr=vers) v.save() else: v = v[0] # Insert text if it does not already exist t = BibleText.objects.filter(vers=v, translationIdentifier=translation) if t.count() <= 0: t = BibleText(vers=v, translationIdentifier=translation, versText=text) t.save() if numverses > 1: for i in range(1, numverses): __insert(translation, book, chapter, vers + i, '') BOOKS = [ 'Gen', 'Exod', 'Lev', 'Num', 'Deut', 'Josh', 'Judg', 'Ruth', '1Sam', '2Sam', '1Kgs', '2Kgs', '1Chr', '2Chr', 'Ezra', 'Neh', 'Esth', 'Job', 'Ps', 'Prov', 'Eccl', 'Song', 'Isa', 'Jer', 'Lam', 'Ezek', 'Dan', 'Hos', 'Joel', 'Amos', 'Obad', 'Jonah', 'Mic', 'Nah', 'Hab', 'Zeph', 'Hag', 'Zech', 'Mal', 'Matt', 'Mark', 'Luke', 'John', 'Acts', 'Rom', '1Cor', '2Cor', 'Gal', 'Eph', 'Phil', 'Col', '1Thess', '2Thess', '1Tim', '2Tim', 'Titus', 'Phlm', 'Heb', 'Jas', '1Pet', '2Pet', '1John', '2John', '3John', 'Jude', 'Rev' ] identifier = identifier if identifier is not None else title.replace( ' ', '') root = xmltree.getroot() if title is not None and identifier is not None and lang is not None: # Ask if this translation does already exist tr = BibleTranslation.objects.filter(identifier=identifier) if tr.count() <= 0: tr = BibleTranslation(identifier=identifier, name=title, language=lang) tr.save() self.stdout.write(' -> created new translation ' + identifier + '.') else: tr = tr[0] # iterate over all verses chapters = root.findall( './/{http://www.bibletechnologies.net/2003/OSIS/namespace}chapter' ) actbook = '' actchapter = 0 overallchaptercount = 0 # booknr = 0 # chapterlist = [] tb = None for chapter in chapters: versesinchapter = chapter.findall( './/{http://www.bibletechnologies.net/2003/OSIS/namespace}verse' ) for vers in versesinchapter: parts = vers.attrib.get('osisID').split('.') bookname = parts[0] cnumber = int(parts[1]) vnumber = int(parts[2]) text = self.element_to_string(vers) if bookname != actbook: # Does this book already exist? bindex = BOOKS.index(bookname) tb = BibleBook.objects.filter(nr=bindex + 1) if tb.count() <= 0: tb = BibleBook(nr=bindex + 1, name='', alternativeNames='') tb.save() else: tb = tb[0] actbook = bookname # booknr += 1 # check for existance of the first vers in this chapter, # cause in Schlachter 2000 the first vers isn't encapsulated # in a verse-tag! if cnumber != actchapter: if vnumber > 1: # The first verse can be found in the parent chapter tag-text __insert( tr, tb, cnumber, 1, self.element_to_string(chapter, [ '{http://www.bibletechnologies.net/2003/OSIS/namespace}div', '{http://www.bibletechnologies.net/2003/OSIS/namespace}verse' ])) actchapter = cnumber # dictindex = '%s_%s' % (booknr, cnumber) # if dictindex not in chapterlist: # chapterlist.append(dictindex) __insert(tr, tb, cnumber, vnumber, text) overallchaptercount += 1 sys.stdout.write('Insert book %s ...' % actbook) print_progress(overallchaptercount, len(chapters))
def insert_zefania_xml(self, xmltree): def insert_in_db(tb, chapter, tr): versCount = 0 for vers in chapter.findall("VERS"): versCount += 1 # Does this vers and chapter already exist? v = BibleVers.objects.filter(bookNr=tb, chapterNr=chapter.get('cnumber'), versNr=vers.get('vnumber')) if v.count() <= 0: v = BibleVers(bookNr=tb, chapterNr=chapter.get('cnumber'), versNr=vers.get('vnumber')) v.save() else: v = v[0] # Insert text if it does not already exist dbVers = BibleText.objects.filter(translationIdentifier=tr, vers=v) if dbVers.count() <= 0: dbVers = BibleText(translationIdentifier=tr, vers=v, versText=self.element_to_string(vers)) dbVers.save() return versCount #################################################### # Insert bibles from zefanja xml root = xmltree.getroot() identifier = root.findtext('INFORMATION/identifier') language = root.findtext('INFORMATION/language') title = root.findtext('INFORMATION/title') # Ask if this translation does already exist tr = BibleTranslation.objects.filter(identifier=identifier) if tr.count() <= 0: tr = BibleTranslation(identifier=identifier, name=title, language=language) tr.save() self.stdout.write(' -> created new translation ' + identifier + '.') else: tr = tr[0] # Insert verses books = root.findall('BIBLEBOOK') bookcount = 0 for book in books: chapterCount = 0 # Does this book already exist tb = BibleBook.objects.filter(nr=book.get('bnumber')) if tb.count() <= 0: tb = BibleBook(nr=int(book.get('bnumber')), name='', alternativeNames='') tb.save() else: tb = tb[0] versCount = 0 chapters = book.findall('CHAPTER') for chapter in chapters: chapterCount += 1 versCount += insert_in_db(tb, chapter, tr) chapterproc = 1.0 / len(chapters) * chapterCount print_progress(bookcount + chapterproc, len(books)) bookcount += 1