def traverse(self, source_path, patch): ''' Traverse source files ''' print '[+] traversing source files' start_time = time.time() self._patch_list = patch.items() self._npatch = patch.length() if os.path.isfile(source_path): magic_type = common.magic_cookie.file(source_path) common.verbose_print(' [-] %s: %s' % (source_path, magic_type)) if magic_type.startswith('text'): main_type, sub_type = magic_type.split('/') magic_ext = self._get_file_type(sub_type) self._process(source_path, magic_ext) elif os.path.isdir(source_path): for root,dirs,files in os.walk(source_path): for file in files: file_path = os.path.join(root, file) magic_type = common.magic_cookie.file(file_path) common.verbose_print(' [-] %s: %s' % (file_path, magic_type)) if magic_type.startswith('text'): main_type, sub_type = magic_type.split('/') magic_ext = self._get_file_type(sub_type) self._process(file_path, magic_ext) elapsed_time = time.time() - start_time print '[+] %d possible matches ... %.1fs\n' % (self._nmatch, elapsed_time) return self._nmatch
def get_sodexo_json(): ''' Form the proper URLs from SODEXO_DEFAULTS and return all wanted JSON data ''' sodexo_base = 'http://www.sodexo.fi/ruokalistat/output/daily_json/' week_dates = get_current_weekdates() sodexo_data = dict() try: for restaurant in config.SODEXO_DEFAULTS: verbose_print('Fetching Sodexo: %s...' % restaurant) sodexo_data[restaurant] = list() for date in week_dates: sodexo_url = '%s%s/%s/%s/%s/fi' % ( sodexo_base, config.SODEXO_ALL[restaurant], date.year, date.month, date.day ) sodexo_data[restaurant].append(str(web.urlopen(sodexo_url).read().decode('utf8'))) except KeyError: print('ERROR: Invalid Sodexo restaurant specified.') print('Use -r flag to find out all the restaurants') sys.exit(1) return sodexo_data
def print_food_menu(menu): ''' Main function for figuring out which stuff to print in what way ''' # Get rid of unnecessary key(s), leaving only the companies. try: menu.pop('lang') except KeyError: pass verbose_print('Printing data...') if config.PRICE_LEVEL not in PRICE_LEVELS: print(ansify('Invalid PRICE_LEVEL value, defaulting back to student level.', 'red')) if config.PRINT_WHOLE_WEEK: print_all(menu) else: print_today(menu)
def _query_bloomfilter(self, source_norm_lines, magic_ext): source_norm_lines = source_norm_lines.split() if len(source_norm_lines) < common.ngram_size: common.verbose_print(' - skipped (%d lines)' % len(source_norm_lines)) return False self._bit_vector.setall(0) num_ngram = len(source_norm_lines) - common.ngram_size + 1 is_vuln_source = False num_ngram_processed = 0 for i in range(0, num_ngram): if num_ngram_processed > common.bloomfilter_size/common.min_mn_ratio: common.verbose_print(' - split Bloom filters (%d n-grams)' % num_ngram_processed) for patch_id in range(0, self._npatch): #if magic_ext == self._patch_list[patch_id].file_ext: hash_list = self._patch_list[patch_id].hash_list is_match = True for h in hash_list: if not self._bit_vector[h]: is_match = False break if is_match: is_vuln_source = True self._match_dict[patch_id].append(self._nsource) common.verbose_print(' - match (patch #%d : source #%d)' % (patch_id, self._nsource)) self._nmatch += 1 num_ngram_processed = 0 self._bit_vector.setall(0) ngram = ''.join(source_norm_lines[i:i+common.ngram_size]) hash1 = common.fnv1a_hash(ngram) & (common.bloomfilter_size-1) hash2 = common.djb2_hash(ngram) & (common.bloomfilter_size-1) hash3 = common.sdbm_hash(ngram) & (common.bloomfilter_size-1) self._bit_vector[hash1] = 1 self._bit_vector[hash2] = 1 self._bit_vector[hash3] = 1 num_ngram_processed += 1 for patch_id in range(0, self._npatch): #if magic_ext == self._patch_list[patch_id].file_ext: hash_list = self._patch_list[patch_id].hash_list is_match = True for h in hash_list: if not self._bit_vector[h]: is_match = False break if is_match: is_vuln_source = True self._match_dict[patch_id].append(self._nsource) common.verbose_print(' - match (patch #%d : source #%d)' % (patch_id, self._nsource)) self._nmatch += 1 return is_vuln_source
def try_loading_cache(): ''' Try to load CACHE_FILE and return it as string If not possible, then return None ''' if not is_cache_uptodate(): return None try: with open(config.CACHE_FILE, 'r') as f: data = '' for line in f: data += line food_menu = json.loads(data) except ValueError: verbose_print('Cache is broken!') return None return food_menu
def get_unica_html(): ''' Form the proper URLs from UNICA_DEFAULTS and return all wanted HTML pages ''' unica_base = 'http://www.unica.fi/' # Default to 'fi', even with wrong configuration lang_jinxer = 'en/restaurants/' if config.LANG.lower() == 'en' else 'fi/ravintolat/' unica_data = dict() try: for restaurant in config.UNICA_DEFAULTS: verbose_print('Fetching Unica: %s...' % restaurant) unica_url = '%s%s%s/' % (unica_base, lang_jinxer, restaurant) unica_data[restaurant] = str(web.urlopen(unica_url).read().decode('utf8')) except: print('ERROR: Invalid Unica restaurant specified.') print('Use -r flag to find out all the restaurants') sys.exit(1) return unica_data
def _exact_match(self): ''' Exact-matching test to catch Bloom filters errors ''' print '[+] performing an exact matching test' start_time = time.time() exact_nmatch = 0 for patch_id, source_id_list in self._match_dict.items(): patch_norm_lines = self._patch_list[patch_id].norm_lines patch_norm_length = len(patch_norm_lines) for source_id in source_id_list: source_norm_lines = self._source_list[source_id].norm_lines source_norm_length = len(source_norm_lines) for i in range(0, (source_norm_length-patch_norm_length+1)): patch_line = 0 source_line = i while patch_norm_lines[patch_line] == source_norm_lines[source_line]: patch_line += 1 source_line += 1 if patch_line == patch_norm_length: common.verbose_print(' [-] exact match - %s : %s (line #%d)' % (self._patch_list[patch_id].file_path, self._source_list[source_id].file_path, i+1)) self._context_dict[patch_id].append(common.ContextInfo(source_id, max(0, i-common.context_line), i, source_line, min(source_line+common.context_line, source_norm_length-1))) exact_nmatch += 1 break while source_line<source_norm_length-patch_norm_length and source_norm_lines[source_line]=='': source_line += 1 if source_line == source_norm_length-patch_norm_length: break elapsed_time = time.time() - start_time print '[+] %d exact matches ... %.1fs\n' % (exact_nmatch, elapsed_time) return exact_nmatch
def is_cache_uptodate(): ''' Check if cache exists and compare CACHE_FILE modify date to current weekdates ''' try: mtime = os.path.getmtime(config.CACHE_FILE) except FileNotFoundError: verbose_print('No cache found!') return False mdate = datetime.fromtimestamp(mtime) for week_date in get_current_weekdates(): if mdate.day == week_date.day: verbose_print('Cache is from this week') return True verbose_print('Cache is not up to date') return False
def is_config_modified_since_caching(food_menu, caching_forbidden): ''' Compare the cached data to current user configs in a few different ways. Check only restaurants and language, since others don't depend on the cache. ''' # Don't do any of this stuff if certain arguments are used. if caching_forbidden: return False try: config_restaurants = config.SODEXO_DEFAULTS + config.UNICA_DEFAULTS except TypeError: print('ERROR: You are using invalid format in your default restaurants.') print('If you want no restaurants put [] as value') sys.exit(1) rest_count = 0 # Language must be checked because we cache only one if not food_menu['lang'] == config.LANG.lower(): verbose_print("Cached language doesn't match configs") print("Configs have been changed since caching! Redownloading...") food_menu.pop('lang') return True food_menu.pop('lang') # The restaurants should be the same for restaurant in food_menu.values(): for name in restaurant: if name not in config_restaurants: verbose_print("Cached restaurants are not the same as in config") print("Configs have been changed since caching! Redownloading...") return True rest_count += 1 # The amount of restaurants should match if len(config_restaurants) != rest_count: verbose_print("The number of cached restaurants doesn't match with config") print("Configs have been changed since caching! Redownloading...") return True return False
def parse_food_data(data): verbose_print('Parsing data...') unica_menu = dict() for restaurant, week_html in data['unica'].items(): verbose_print('Parsing Unica: %s...' % restaurant) unica_menu[restaurant] = parse_unica_html(week_html) sodexo_menu = dict() for restaurant, week_json in data['sodexo'].items(): verbose_print('Parsing Sodexo: %s...' % restaurant) sodexo_menu[restaurant] = parse_sodexo_json(week_json) # Avoid putting empty data in the dictionary food_data = dict() if sodexo_menu: food_data['sodexo'] = sodexo_menu if unica_menu: food_data['unica'] = unica_menu return food_data
try: args = parser.parse_args() common.ngram_size = args.ngram_size common.context_line = args.context_line common.verbose_mode = args.verbose_mode return args.patch_path, args.source_path except IOError, msg: parser.error(str(msg)) if __name__ == '__main__': # parse arguments start_time = time.time() patch_path, source_path = parse_args() common.verbose_print('[-] ngram_size : %d' % common.ngram_size) common.verbose_print('[-] context_line : %d' % common.context_line) common.verbose_print('[-] verbose_mode : %s' % common.verbose_mode) common.verbose_print('[-] patch_path : %s' % patch_path) common.verbose_print('[-] source_path : %s' % source_path) # initialize a magic cookie pointer try: common.magic_cookie = magic.open(magic.MAGIC_MIME) common.magic_cookie.load() except AttributeError: common.magic_cookie = magic.Magic(mime=True, uncompress=True) common.verbose_print('[-] initialized magic cookie\n') # traverse patch files patch = patchloader.PatchLoader()
def _process(self, patch_path): ''' Normalize a patch file and build a hash list ''' patch_filename = patch_path.split('/')[-1] patch_file = open(patch_path, 'r') patch_lines = patch_file.readlines() patch_file.close() magic_ext = None process_flag = False diff_file = '' diff_cnt = 0 diff_vuln_lines = [] diff_orig_lines = [] for line in patch_lines: if line.startswith('--- '): if diff_vuln_lines: diff_norm_lines = self._normalize(''.join(diff_vuln_lines), magic_ext).split() if len(diff_norm_lines) >= common.ngram_size: common.verbose_print(' %s %d (ext: %d)' % (diff_file, diff_cnt, magic_ext)) path = '[%s] %s #%d' % (patch_filename, diff_file, diff_cnt) hash_list = self._build_hash_list(diff_norm_lines) self._patch_list.append( common.PatchInfo(path, magic_ext, ''.join(diff_orig_lines), diff_norm_lines, hash_list)) else: common.verbose_print( ' %s %d (ext: %d) - skipped (%d lines)' % (diff_file, diff_cnt, magic_ext, len(diff_norm_lines))) del diff_vuln_lines[:] del diff_orig_lines[:] diff_path = line.split()[1] if diff_path == '/dev/null': process_flag = False else: process_flag = True diff_cnt = 0 diff_file = diff_path.split('/')[-1] magic_ext = self._get_file_type(diff_file) elif process_flag: if line.startswith('+++ '): diff_path = line.split()[1] if diff_path == '/dev/null': process_flag = False elif line.startswith('@@'): if diff_vuln_lines: diff_norm_lines = self._normalize( ''.join(diff_vuln_lines), magic_ext).split() if len(diff_norm_lines) >= common.ngram_size: common.verbose_print( ' %s %d (ext: %d)' % (diff_file, diff_cnt, magic_ext)) path = '[%s] %s #%d' % (patch_filename, diff_file, diff_cnt) hash_list = self._build_hash_list(diff_norm_lines) self._patch_list.append( common.PatchInfo(path, magic_ext, ''.join(diff_orig_lines), diff_norm_lines, hash_list)) else: common.verbose_print( ' %s %d (ext: %d) - skipped (%d lines)' % (diff_file, diff_cnt, magic_ext, len(diff_norm_lines))) del diff_vuln_lines[:] del diff_orig_lines[:] diff_cnt += 1 elif line.startswith('-'): diff_vuln_lines.append(line[1:]) diff_orig_lines.append('<font color=\"#AA0000\">') diff_orig_lines.append( line.replace('<', '<').replace('>', '>')) diff_orig_lines.append('</font>') elif line.startswith('+'): diff_orig_lines.append('<font color=\"#00AA00\">') diff_orig_lines.append( line.replace('<', '<').replace('>', '>')) diff_orig_lines.append('</font>') elif line.startswith(' '): diff_vuln_lines.append(line[1:]) diff_orig_lines.append( line.replace('<', '<').replace('>', '>')) if diff_vuln_lines: diff_norm_lines = self._normalize(''.join(diff_vuln_lines), magic_ext).split() if len(diff_norm_lines) >= common.ngram_size: common.verbose_print(' %s %d (ext: %d)' % (diff_file, diff_cnt, magic_ext)) path = '[%s] %s #%d' % (patch_filename, diff_file, diff_cnt) hash_list = self._build_hash_list(diff_norm_lines) self._patch_list.append( common.PatchInfo(path, magic_ext, ''.join(diff_orig_lines), diff_norm_lines, hash_list)) else: common.verbose_print( ' %s %d (ext: %d) - skipped (%d lines)' % (diff_file, diff_cnt, magic_ext, len(diff_norm_lines)))