def __init__(self, tvs, season, episode): self.tvs = tvs self.season = season self.episode = episode if tvs == 'G' or tvs == 'g': self.subDir = self.dirFeaturesGoT self.subtlDir = self.subtlDir + 'GoT/English/GameOfThrones.' self.saveClstResult = self.saveClstResult + 'GoT_' GoT = True elif tvs == 'B' or tvs == 'b': self.subDir = self.dirFeaturesBB self.subtlDir = self.subtlDir + 'BB/English/BreakingBad.' self.saveClstResult = self.saveClstResult + 'BB_' GoT = False else: print('you did not choose a TV-Series! bye bye') self.Season = "Season0" + str(season) self.Episode = ("Episode" + str(episode)) if int(episode) >= 10 else ("Episode0" + str(episode)) self.fileName = self.Season + '.' + self.Episode self.subtlFile = self.subtlDir + self.fileName + '.en.srt' self.saveClstResult = self.saveClstResult + self.Season + 'Result.ixt' print('You have chosen :') if GoT == True: print('Tv-Series: Game of Thrones') else: print('Tv-Series: Breaking bad') print('Season: ', self.season) print('Episode:', self.episode) print('Subtile File : ', self.subtlFile) self.sub = sb.Subtitle(self.subtlFile)
def analyse(self, subtitleFile, wordFrequencyDistribution): global currentSubtitle global nextSubtitle firstRun = True try: subFile = open(subtitleFile) except Exception: return 0 # Get rid of number of lines in file symbol subFile.readline() lineNumber = 0 firstRun = True while (True): subNumber = subFile.readline() subNumber = subNumber.strip() if ((subNumber == '') or (subNumber.isspace())): break else: lineNumber += 1 timeSub = subFile.readline() if (timeSub == None): raise ValueError('Error parsing time subtitle') subtitleString = '' s = None newSubtitleFound = False nextSubtitleText = '' while (True): s = subFile.readline() if ((s == '') or (s.isspace())): break else: if ((s.strip().startswith('-')) and (len(subtitleString) > 0)): newSubtitleFound = True nextSubtitleText = s else: subtitleString += s + ' ' startTime = int(self.parse(timeSub.split('-->')[0])) stopTime = int(self.parse(timeSub.split('-->')[1])) #print(startTime) #print(stopTime) number = int(subNumber) nextSubtitle = Subtitle.Subtitle(number, startTime, stopTime, subtitleString) if (firstRun == True): firstRun = False currentSubtitle = nextSubtitle else: self.addToFrequencyDistributionWithFreq( wordFrequencyDistribution) #self.addToFrequencyDistributionEmpty() if (newSubtitleFound == True): nextSubtitle = Subtitle.Subtitle( number, startTime, stopTime, nextSubtitleText) self.addToFrequencyDistributionWithFreq( wordFrequencyDistribution)
def analyse(self, subtitleFile): global currentSubtitle global nextSubtitle global mostCommonWords global responseGroupId print(responseGroupId) firstRun = True try: print('Attempting to open ' + subtitleFile) subFile = open(subtitleFile, 'r') #subFile = codecs.open(subtitleFile, 'r', encoding=encodingEstimate) except Exception as e: print('Could not open, wrong encoding perhaps?') firstRun = True while (True): try: subNumber = subFile.readline() subNumber = subNumber.strip() if ((subNumber == '') or (subNumber.isspace()) or (not (subNumber.isdigit()))): break else: try: timeSub = subFile.readline() if (timeSub == None): raise ValueError('Error parsing time subtitle') subtitleString = '' s = None newSubtitleFound = False nextSubtitleText = '' while (True): s = subFile.readline() if ((s == '') or (s.isspace())): break else: if ((s.strip().startswith('-')) and (len(subtitleString) > 0)): newSubtitleFound = True nextSubtitleText = s else: subtitleString += s + ' ' startTime = int(self.parse(timeSub.split('-->')[0])) stopTime = int(self.parse(timeSub.split('-->')[1])) #print(startTime) #print(stopTime) number = int(subNumber) nextSubtitle = Subtitle.Subtitle( number, startTime, stopTime, subtitleString) if (firstRun == True): firstRun = False currentSubtitle = nextSubtitle else: self.checkSubtitlesAreOk() if (newSubtitleFound == True): nextSubtitle = Subtitle.Subtitle( number, startTime, stopTime, nextSubtitleText) self.checkSubtitlesAreOk() except Exception as e: print('Could not parse sub, exception was ' + str(e)) #return frequencyDistribution except Exception as e: print('Could not parse sub file, exception was ' + str(e))