def main(): if os.path.exists('data') and not os.path.isdir('data'): print "data is not a directory." return 1 elif not os.path.exists('data'): # Create the directory if it doesn't already exist. os.mkdir('data') mirror = 'http://http.us.debian.org/debian' dictionaries = { 'wbritish-small': { 'download': '/pool/main/s/scowl/wbritish-small_7.1-1_all.deb', 'filename': 'british-english-small', }, 'wbritish-large': { 'download': '/pool/main/s/scowl/wbritish-large_7.1-1_all.deb', 'filename': 'british-english-large', }, 'wbritish-huge': { 'download': '/pool/main/s/scowl/wbritish-huge_7.1-1_all.deb', 'filename': 'british-english-huge', }, # This possibly contains invalid words (as well ones that are very uncommon) 'wbritish-insane': { 'download': '/pool/main/s/scowl/wbritish-insane_7.1-1_all.deb', 'filename': 'british-english-insane', }, } # Choose which dictionary to use dictionary = dictionaries['wbritish-insane'] dictionaryUri = mirror + dictionary['download'] try: import debian except ImportError: debian = importDebian() from debian.arfile import ArFile debFile = 'data/wordlist.deb' print "Downloading " + dictionaryUri response = urllib2.urlopen(dictionaryUri) with open(debFile, 'wb') as fw: shutil.copyfileobj(response, fw) ar = ArFile(debFile) data = [m for m in ar.members if 'data.tar.gz' == m.name][0] with tarfile.open(fileobj=data, mode="r:gz") as tar: wordlist = tar.extractfile('./usr/share/dict/' + dictionary['filename']) with open('data/wordlist.txt', 'w') as fw: shutil.copyfileobj(wordlist, fw) return 0
def compare_deb_files(path1, path2, source=None): differences = [] # look up differences in content ar1 = ArFile(filename=path1) ar2 = ArFile(filename=path2) with make_temp_directory() as temp_dir1: with make_temp_directory() as temp_dir2: logger.debug('content1 %s', ar1.getnames()) logger.debug('content2 %s', ar2.getnames()) for name in sorted( set(ar1.getnames()).intersection(ar2.getnames())): logger.debug('extract member %s', name) member1 = ar1.getmember(name) member2 = ar2.getmember(name) in_path1 = os.path.join(temp_dir1, name) in_path2 = os.path.join(temp_dir2, name) with open(in_path1, 'w') as f1: f1.write(member1.read()) with open(in_path2, 'w') as f2: f2.write(member2.read()) differences.extend( debbindiff.comparators.compare_files(in_path1, in_path2, source=name)) os.unlink(in_path1) os.unlink(in_path2) # look up differences in file list and file metadata content1 = get_ar_content(path1) content2 = get_ar_content(path2) difference = Difference.from_unicode(content1, content2, path1, path2, source="metadata") if difference: differences.append(difference) return differences