def test_levenshtein(self):
        eq(distance('a', 'ab'), 1)  # number of additions, deletions, updates

        eq(ratio('a', 'b'), 0)  # in [0, 1]
        eq(ratio('a', 'a'), 1)

        eq(setratio(['a', 'b'], ['b', 'a']),
           1.0)  # in [0, 1] compares two sets by best fit, order doesnt matter
        eq(setratio(['c', 'd'], ['b', 'a']),
           0)  # in [0, 1] compares two sets by best fit, order doesnt matter

        eq(seqratio(['a', 'b'], ['b', 'a']), 0.5)  # in [0, 1]
        eq(seqratio(['a', 'b'], ['a', 'b']), 1.0)  # in [0, 1]
        eq(seqratio(['a'], ['a', 'b']), 2 / 3)
    def test_evaluate(self):
        overall_accuracy = {}

        mail_count = 0
        for mail in self.mails_denotated:
            raw = mail.original_email
            email_input = EmailMessage(mail.path, mail.filename,
                                       ep.Parser().parsestr(raw))
            predicted = self.quagga._predict(email_input.clean_body)
            parsed = self.quagga._parse(predicted, email_input)

            denotations = mail.denotations
            denotation_blocks = DenotationBlockConverter.convert(denotations)

            for i, (parsed_block, annotated_block) in enumerate(
                    zip(parsed['blocks'], denotation_blocks['blocks'])):

                # since some annotations are not consistent (some have day before date)
                # we normalize the parsed and the annotated stuff, only the outcome matters anyway
                annotated_block['sent'] = Normalizer.normalize_sent(
                    annotated_block['sent'])
                self.clean_parsed_block(parsed_block)
                self.clean_block(annotated_block)

                block_accuracy = {
                    'from': 1,
                    'to': 1,
                    'cc': 1,
                    'sent': 1,
                    'subject': 1,
                    'raw_header': 1,
                    'type': 1,
                    'text': 1
                }
                if i == 0 and parsed_block[
                        'type'] == 'root' and annotated_block['type'] == 'root':
                    block_accuracy['text'] = seqratio(parsed_block['text'],
                                                      annotated_block['text'])
                else:
                    block_accuracy['from'] = ratio(parsed_block['raw_from'],
                                                   annotated_block['from'])
                    block_accuracy['to'] = setratio(parsed_block['raw_to'],
                                                    annotated_block['to'])
                    block_accuracy['cc'] = setratio(parsed_block['raw_cc'],
                                                    annotated_block['cc'])
                    block_accuracy['sent'] = ratio(parsed_block['sent'],
                                                   annotated_block['sent'])
                    block_accuracy['subject'] = ratio(
                        parsed_block['subject'], annotated_block['subject'])
                    block_accuracy['raw_header'] = seqratio(
                        parsed_block['raw_header'],
                        annotated_block['raw_header'])
                    block_accuracy['type'] = ratio(parsed_block['type'],
                                                   annotated_block['type'])
                    block_accuracy['text'] = seqratio(parsed_block['text'],
                                                      annotated_block['text'])

                annotated_block['error'] = block_accuracy

            if len(parsed['blocks']) != len(denotation_blocks['blocks']):
                print("blocks have different length, skipping")
                continue

            mail_accuracy = {
                'from': 0,
                'to': 0,
                'cc': 0,
                'sent': 0,
                'subject': 0,
                'raw_header': 0,
                'type': 0,
                'text': 0
            }
            for i, annotated_block in enumerate(denotation_blocks['blocks']):
                if i == 0:
                    mail_accuracy['text'] += annotated_block['error']['text']
                else:
                    for key in annotated_block['error'].keys():
                        mail_accuracy[key] += annotated_block['error'][key]

            for key in mail_accuracy.keys():
                if key == 'text':
                    mail_accuracy['text'] /= len(denotation_blocks['blocks'])
                else:
                    if len(denotation_blocks['blocks']) == 1:
                        mail_accuracy[key] = 1
                    else:
                        mail_accuracy[key] /= len(
                            denotation_blocks['blocks']) - 1

            plt.plot(mail_accuracy.keys(),
                     mail_accuracy.values(),
                     label=mail.filename)
            for key in mail_accuracy.keys():
                try:
                    overall_accuracy[key] += mail_accuracy[key]
                except KeyError:
                    overall_accuracy[key] = mail_accuracy[key]
            mail_count += 1

        for key in overall_accuracy.keys():
            overall_accuracy[key] /= mail_count

        print(overall_accuracy)
        # plt.legend()
        plt.show()
Beispiel #3
0
	def image_match(self, id, image_path, default_match_rate=.79, VERBOSE='SEMI'):
	
		#assign append to keep rom_list from being evaluated each iteration
		rom_list = []
		missing_list = []
		
		rom_list_append = rom_list.append
		missing_append = missing_list.append
		
		roms = [os.path.split(os.path.splitext(item)[0])[-1] for item in glob.glob( image_path + '*.*')]
		
		print
		print id, image_path
		print
		
		for index, rom in enumerate(roms):
	
			#set minimum match ratio
			hi_score = default_match_rate
			best_match_game = None
			
			#build search query
			#we are grabbing any entry that has at least 1 matching search term
			current_file_search_terms = unicode(self.normalize(rom))
			search_query = '%" OR search_terms LIKE "%'.join(current_file_search_terms.split())
			for entry in self.GC.execute('SELECT id, search_terms, title FROM image_match WHERE system=' + str(id) + ' AND (search_terms LIKE "%' + search_query + '%")').fetchall():
				
				Lratio = setratio( unicode(current_file_search_terms).split(), entry[1].split() )
				if Lratio > hi_score:
				
					#check if check to make sure sequels don't get mat5hed to originals
					if [x for x in current_file_search_terms if x.isdigit()] == [y for y in entry[1] if y.isdigit()]:
						hi_score = Lratio
						best_match_game = entry
			
			#Let user know current progress
			if VERBOSE:
				status = r"%10d/%d roms  [%3.2f%%]" % (index+1, len(roms), (index+1) * 100. / len(roms))
				status = status + chr(8)*(len(status)+1)
				sys.stdout.write('%s      \r' % (status))
				sys.stdout.flush()
			
			#in verbose mode: ask if game matches
			if (VERBOSE == "SEMI" or VERBOSE == "FULL") and hi_score < .94 and best_match_game:
				best_match_game = best_match_game if self.raw_input_with_timeout('Does %s match %s - %s' % (pcolor('cyan', "["+ rom +"]"), 
																										pcolor('cyan', "["+ best_match_game[2] +"]"),
																										pcolor('yellow', "["+"{0:.0f}%".format(float(hi_score) * 100)+"]")), timeout = 10.0) else None
			if VERBOSE == 'FULL':
				try:
					if best_match_game:
						print 'Closest match for %s is %s - %s' % (pcolor('green', "["+ rom +"]"), pcolor('green', "["+ best_match_game[2] +"]"), pcolor('yellow', "["+"{0:.0f}%".format(float(hi_score) * 100)+"]"))
					else:
						print 'No match found for %s' % (pcolor('red', "[" + rom + "]"))
				except:
					pass
			
			
			#If a suitable match was found, pull info
			if best_match_game:
				rom_list_append(( rom, best_match_game[0] ))
			else:
				missing_append(( id, rom ))


		
		self.GC.executemany('UPDATE image_match SET image_file=? WHERE id=?', rom_list)
		self.GC.executemany('INSERT INTO missing_entries (system, title) VALUES (?, ?)', missing_list)
		self.GAMES.commit()
		print
Beispiel #4
0
	def console_match(self, platform, get_rom_name_with_crc, default_match_rate, VERBOSE, RUN_WHOLE_SYSTEM_FOLDER, dont_match):
	
		column_names = [item[1] for item in self.GC.execute('PRAGMA table_info(console)').fetchall()]
						
		#prepare to get_name for non-arcade
		find_name = self.get_name(platform['scraper_id'], get_rom_name_with_crc)
		find_name.send(None)
			
		#load rom filenames, initialize rom_list to return matches
		print 'Fetching %s rom list...' % pcolor('cyan', platform['label'])
		roms = self.get_stored_roms(platform['rom_path'])
		
		#Create Temp table with only currently 
		print 'Connecting to PiPlay Database...'
		self.GC.execute('DROP TABLE IF EXISTS temp_system')
		self.GC.execute('CREATE TEMP TABLE temp_system AS SELECT * FROM console WHERE 0')
		query = 'INSERT INTO temp_system  SELECT * FROM console WHERE system in (%s)' % platform['scraper_id'] 
		self.GC.execute(query)
		self.GAMES.commit()
		
		if RUN_WHOLE_SYSTEM_FOLDER:
			#delete all entries for system
			query = 'DELETE FROM local_roms WHERE system = {platform_id}'.format(platform_id = platform['id'])
			self.LC.execute(query)
		else:
			if dont_match == False:
				#delete all entries that no longer have roms + previously unmatched entries
				query_roms = tuple([x.encode('UTF8') for x in roms]) if len(roms) != 1 else ("('" + roms[0].encode('UTF8') + "')")
				query = 'DELETE FROM local_roms WHERE system = {0} and (rom_file not in {1} or flags like "%no_match%")'.format( platform['id'], query_roms )
				self.LC.execute(query)
			
			#remove any remaining entries from list of roms
			query = 'SELECT rom_file FROM local_roms WHERE system = {platform_id}'.format( platform_id = platform['id'])
			roms = list( set(roms) - set(item[0] for item in self.LC.execute(query).fetchall()) )
			
		self.LOCAL.commit()

		if roms:
			#assign append to keep rom_list from being evaluated each iteration
			rom_list = []
			rom_list_append = rom_list.append
			
			for index, rom in enumerate(roms):
				
				#get rom name
				current_file_search_terms = find_name.send(os.path.join(platform['rom_path'], rom))
				find_name.send('get_ready')
				
				#create run command
				if platform['include_extension']: 
					build_command = rom
				else:
					build_command = os.path.splitext(rom)[0]
				
				if platform['include_full_path']:
					build_command = os.path.join(platform['rom_path'], build_command)
				
				game_command = platform['command'] + ' "' + build_command + '"'
				
				#update what is already known about current entry
				game_info = Game(title = rom, system = platform['id'], search_terms = current_file_search_terms, command = game_command, rom_path = platform['rom_path'], rom_file = rom)

				#set minimum match ratio
				hi_score = default_match_rate
				best_match_game = None
				
				if dont_match == False:
					#build search query
					#we are grabbing any entry that has at least 1 matching search term
					search_query = '%" OR search_terms LIKE "%'.join(unicode(current_file_search_terms).split())
					for entry in self.GC.execute('SELECT id, search_terms, title, system FROM temp_system WHERE (search_terms LIKE "%' + search_query + '%")').fetchall():
						
						Lratio = setratio( unicode(current_file_search_terms).split(), entry[1].split() )
						if Lratio > hi_score:
						
							#check if check to make sure sequels don't get matched to originals
							if [x for x in current_file_search_terms if x.isdigit()] == [y for y in entry[1] if y.isdigit()]:
								hi_score = Lratio
								best_match_game = entry
					
					#if no satisfactory match found, do second pass comparing each letter separately
					if not best_match_game:
						for entry in self.GC.execute('SELECT id, search_terms, title, system FROM temp_system WHERE (search_terms LIKE "%' + search_query + '%")').fetchall():
						
							Lratio = setratio( map(unicode,current_file_search_terms), map(unicode, entry[1]) )
							if Lratio > hi_score:
							
								#check if check to make sure sequels don't get matched to originals
								if [x for x in current_file_search_terms if x.isdigit()] == [y for y in entry[1] if y.isdigit()]:
									hi_score = Lratio
									best_match_game = entry
								
								
					#in verbose mode: ask if game matches
					if (VERBOSE == "SEMI" or VERBOSE == "FULL") and hi_score < .94 and best_match_game:
						best_match_game = best_match_game if self.raw_input_with_timeout('Does %s match %s - %s' % (pcolor('cyan', "["+ rom +"]"), 
																																								pcolor('cyan', "["+ best_match_game[2] +"]"),
																																								pcolor('yellow', "["+"{0:.0f}%".format(float(hi_score) * 100)+"]")), timeout = 10.0) else None
					if VERBOSE == "FULL":
						if best_match_game:
							print 'Closest match for %s is %s - %s' % (pcolor('green', "["+ rom +"]"), pcolor('green', "["+ best_match_game[2] +"]"), pcolor('yellow', "["+"{0:.0f}%".format(float(hi_score) * 100)+"]"))
						else:
							print 'No match found for %s' % (pcolor('red', "[" + rom + "]"))
							
				#Let user know current progress
				if VERBOSE:
					status = r"%10d/%d roms  [%3.2f%%]" % (index + 1, len(roms), (index + 1) * 100. / len(roms))
					status = status + chr(8)*(len(status)+1)
					sys.stdout.write('%s      \r' % (status))
					sys.stdout.flush()
				
				
				#If a suitable match was found, pull info
				if best_match_game:
					temp_game_info = dict(zip(column_names, self.GC.execute('SELECT * from temp_system where id=?', (best_match_game[0],)).fetchone()))
					
					game_info.title = temp_game_info['title']
					game_info.search_terms = temp_game_info['search_terms']
					game_info.release_date = temp_game_info['release_date']
					game_info.overview = temp_game_info['overview']
					game_info.esrb = temp_game_info['esrb']
					game_info.genres = temp_game_info['genres']
					game_info.players = temp_game_info['players']
					game_info.coop = temp_game_info['coop']
					game_info.publisher = temp_game_info['publisher']
					game_info.developer = temp_game_info['developer']
					game_info.rating = temp_game_info['rating']
				else:
					game_info.flags = 'no_match,'
				
				#if name contains brackets [] with a minus '-' inside, glob will error out
				if dont_match == False:
					try:
						#prefer (user added) image, named same as rom + any extension
						temp_image_path = glob.glob( os.path.join( os.path.join(platform['rom_path'], 'images/'), os.path.splitext(rom)[0] ) + '.*')
						game_info.image_file = temp_image_path[0]
					except:
						try:
							#if no rom named image, then find title named image
							if not game_info.image_file:
							
								image_search = self.GC.execute('SELECT image_file FROM image_match WHERE system=? and id=?', (best_match_game[3], best_match_game[0])).fetchone()[0]
								if image_search: image_search =  [os.path.join(platform['rom_path'], 'images/') + image_search + '.*',
																				os.path.join( os.path.join(platform['rom_path'], 'images/'), self.strip_accents(temp_game_info['title']) + '.*')]
								for image in image_search:
									temp_image_path.extend( glob.glob( image ) )
								game_info.image_file = temp_image_path[0]
						except:
							#if no image found, default to rom name with no extension. Boxart thread in romlistscene will try both .jpg and .png extensions
							game_info.image_file = os.path.join( os.path.join(platform['rom_path'], 'images/'), os.path.splitext(rom)[0] )
				else:
					game_info.image_file = os.path.join( os.path.join(platform['rom_path'], 'images/'), os.path.splitext(rom)[0] )
				
				
				rom_list_append((game_info.id, game_info.system,
										game_info.title, game_info.search_terms,
										None, None, #parent, cloneof -> for arcade
										game_info.release_date, game_info.overview,
										game_info.esrb, game_info.genres,
										game_info.players, game_info.coop,
										game_info.publisher, game_info.developer,
										game_info.rating, game_info.command,
										game_info.rom_file, game_info.rom_path,
										game_info.image_file, 0, game_info.flags))
			
			self.LC.executemany('INSERT INTO local_roms '  + 
				'(id, system, title, search_terms, parent, cloneof, release_date, overview, esrb, genres, ' +
				'players, coop, publisher, developer, rating, command, rom_file, rom_path, image_file, number_of_runs, flags) ' +
				'VALUES (' + ('?,' * 21)[:-1] + ')', rom_list)
			self.LOCAL.commit()
			print	
Beispiel #5
0
    def matching_distance(self, tokens_a, tokens_b, string_type):
        """ <tokens_a>: List of Tokens A
            <tokens_b>: List of Tokens B
            <string_type>: Type of distance algorithm 
                           0 = Levenshtein edit-distance,
                           1 = FREE SLOT
                           2 = FREE SLOT
                           3 = FREE SLOT
                           4 = Similarity ratio [Not final]
        """

        _distance_lst = []
        _distance_fin = 0.0

        # 0: Levenshtein edit-distance
        if string_type == 0:
            for _token_a in tokens_a:
                if tokens_b.count(_token_a) > 0:
                    _distance_lst.append(0.0)
                else:
                    if len(tokens_a) > 0 and len(tokens_b) > 0:
                        _scores = []
                        for _token_b in tokens_b:
                            _dist_score = distance(_token_a, _token_b)

                            _scores.append(
                                float(_dist_score) / float(len(_token_a)))

                        # stores the smallest score in <_distance_lst>
                        # Note: smallest score = similar A and B tokens
                        _distance_lst.append(min(_scores))
                    else:
                        _distance_lst.append(1)

            for _score in _distance_lst:
                _distance_fin += _score

            _distance_fin = float(_distance_fin / float(len(_distance_lst)))

            if len(tokens_a) < len(tokens_b):
                _distance_fin = _distance_fin + (
                    float(len(tokens_b) - len(tokens_a)) / 10.0)

            if len(tokens_a) > len(tokens_b):
                _distance_fin = _distance_fin + (
                    float(len(tokens_a) - len(tokens_b)) / 10.0)

            if _distance_fin > 1:
                _distance_fin = 0
            else:
                _distance_fin = 1 - _distance_fin

        # 1: FREE SLOT
        if string_type == 1:
            #for _token_a in tokens_a:
            #    if tokens_b.count(_token_a) > 0:
            #        _distance_lst.append(0.0)
            #    else:
            #        if len(tokens_a)>0 and len(tokens_b)>0:
            #            _scores = []
            #            for _token_b in tokens_b:
            #                _dist_score = edit_distance(_token_a,
            #                                            _token_b,
            #                                            transpositions=True)

            #                _scores.append(float(_dist_score)/float(len(_token_a)))

            #            # stores the smallest score in <_distance_lst>
            #            # Note: smallest score = similar A and B tokens
            #            _distance_lst.append(min(_scores))
            #        else:
            #            _distance_lst.append(1)

            #for _score in _distance_lst:
            #    _distance_fin += _score

            #_distance_fin = float(_distance_fin / float(len(_distance_lst)))

            #if len(tokens_a) < len(tokens_b):
            #    _distance_fin = _distance_fin + (float(len(tokens_b) - len(tokens_a))/10.0)

            #if len(tokens_a) > len(tokens_b):
            #    _distance_fin = _distance_fin + (float(len(tokens_a) - len(tokens_b))/10.0)
            pass

        # 2: Jaccard distance
        if string_type == 2:
            #_distance_fin = jaccard_distance (set(tokens_a), set(tokens_b))
            pass

        # 3: Measuring Agreement on Set-Valued Items (MASI)
        if string_type == 3:
            #_distance_fin = masi_distance(set(tokens_a), set(tokens_b))
            pass

        # 4: Similarity ratio - Compute similarity ratio of two strings sets.
        #    The best match between any strings in the first set and the second set
        #    (passed as sequences) is attempted. I.e., the order doesn't matter here.
        if string_type == 4:
            _distance_fin = setratio(tokens_a, tokens_b)

        return _distance_fin
Beispiel #6
0
	def image_match(self, id, image_path, default_match_rate=.79, VERBOSE='SEMI'):
	
		#assign append to keep rom_list from being evaluated each iteration
		rom_list = []
		missing_list = []
		
		rom_list_append = rom_list.append
		missing_append = missing_list.append
		
		roms = [os.path.split(os.path.splitext(item)[0])[-1] for item in glob.glob( image_path + '*.*')]
		
		print
		print id, image_path
		print
		
		for index, rom in enumerate(roms):
	
			#set minimum match ratio
			hi_score = default_match_rate
			best_match_game = None
			
			#build search query
			#we are grabbing any entry that has at least 1 matching search term
			current_file_search_terms = unicode(self.normalize(rom))
			search_query = '%" OR search_terms LIKE "%'.join(current_file_search_terms.split())
			for entry in self.GC.execute('SELECT id, search_terms, title FROM image_match WHERE system=' + str(id) + ' AND (search_terms LIKE "%' + search_query + '%")').fetchall():
				
				Lratio = setratio( unicode(current_file_search_terms).split(), entry[1].split() )
				if Lratio > hi_score:
				
					#check if check to make sure sequels don't get mat5hed to originals
					if [x for x in current_file_search_terms if x.isdigit()] == [y for y in entry[1] if y.isdigit()]:
						hi_score = Lratio
						best_match_game = entry
			
			#Let user know current progress
			if VERBOSE:
				status = r"%10d/%d roms  [%3.2f%%]" % (index+1, len(roms), (index+1) * 100. / len(roms))
				status = status + chr(8)*(len(status)+1)
				sys.stdout.write('%s      \r' % (status))
				sys.stdout.flush()
			
			#in verbose mode: ask if game matches
			if (VERBOSE == "SEMI" or VERBOSE == "FULL") and hi_score < .94 and best_match_game:
				best_match_game = best_match_game if self.raw_input_with_timeout('Does %s match %s - %s' % (pcolor('cyan', "["+ rom +"]"), 
																																						pcolor('cyan', "["+ best_match_game[2] +"]"),
																																						pcolor('yellow', "["+"{0:.0f}%".format(float(hi_score) * 100)+"]")), timeout = 10.0) else None
			if VERBOSE == 'FULL':
				try:
					if best_match_game:
						print 'Closest match for %s is %s - %s' % (pcolor('green', "["+ rom +"]"), pcolor('green', "["+ best_match_game[2] +"]"), pcolor('yellow', "["+"{0:.0f}%".format(float(hi_score) * 100)+"]"))
					else:
						print 'No match found for %s' % (pcolor('red', "[" + rom + "]"))
				except:
					pass
			
			
			#If a suitable match was found, pull info
			if best_match_game:
				rom_list_append(( rom, best_match_game[0] ))
			else:
				missing_append(( id, rom ))


		
		self.GC.executemany('UPDATE image_match SET image_file=? WHERE id=?', rom_list)
		self.GC.executemany('INSERT INTO missing_entries (system, title) VALUES (?, ?)', missing_list)
		self.GAMES.commit()
		print
Beispiel #7
0
	def console_match(self, platform, get_rom_name_with_crc, default_match_rate, VERBOSE, RUN_WHOLE_SYSTEM_FOLDER, dont_match):
	
		column_names = [item[1] for item in self.GC.execute('PRAGMA table_info(console)').fetchall()]
						
		#prepare to get_name for non-arcade
		find_name = self.get_name(platform['scraper_id'], get_rom_name_with_crc)
		find_name.send(None)
			
		#load rom filenames, initialize rom_list to return matches
		print 'Fetching %s rom list...' % pcolor('cyan', platform['label'])
		roms = self.get_stored_roms(platform['rom_path'])
		
		#Create Temp table with only currently 
		print 'Connecting to PiPlay Database...'
		self.GC.execute('DROP TABLE IF EXISTS temp_system')
		self.GC.execute('CREATE TEMP TABLE temp_system AS SELECT * FROM console WHERE 0')
		query = 'INSERT INTO temp_system  SELECT * FROM console WHERE system in (%s)' % platform['scraper_id'] 
		self.GC.execute(query)
		self.GAMES.commit()
		
		if RUN_WHOLE_SYSTEM_FOLDER:
			#delete all entries for system
			query = 'DELETE FROM local_roms WHERE system = {platform_id}'.format(platform_id = platform['id'])
			self.LC.execute(query)
		else:
			if dont_match == False:
				#delete all entries that no longer have roms + previously unmatched entries
				query_roms = tuple([x.encode('UTF8') for x in roms]) if len(roms) != 1 else ("('" + roms[0].encode('UTF8') + "')")
				query = 'DELETE FROM local_roms WHERE system = {0} and (rom_file not in {1} or flags like "%no_match%")'.format( platform['id'], query_roms )
				self.LC.execute(query)
			
			#remove any remaining entries from list of roms
			query = 'SELECT rom_file FROM local_roms WHERE system = {platform_id}'.format( platform_id = platform['id'])
			roms = list( set(roms) - set(item[0] for item in self.LC.execute(query).fetchall()) )
			
		self.LOCAL.commit()

		if roms:
			#assign append to keep rom_list from being evaluated each iteration
			rom_list = []
			rom_list_append = rom_list.append
			
			for index, rom in enumerate(roms):
				
				#get rom name
				current_file_search_terms = find_name.send(os.path.join(platform['rom_path'], rom))
				find_name.send('get_ready')
				
				#create run command
				if platform['include_extension']: 
					build_command = rom
				else:
					build_command = os.path.splitext(rom)[0]
				
				if platform['include_full_path']:
					build_command = os.path.join(platform['rom_path'], build_command)
				
				game_command = platform['command'] + ' "' + build_command + '"'
				
				#update what is already known about current entry
				game_info = Game(title = rom, system = platform['id'], search_terms = current_file_search_terms, command = game_command, rom_path = platform['rom_path'], rom_file = rom)

				#set minimum match ratio
				hi_score = default_match_rate
				best_match_game = None
				
				if dont_match == False:
					#build search query
					#we are grabbing any entry that has at least 1 matching search term
					search_query = '%" OR search_terms LIKE "%'.join(unicode(current_file_search_terms).split())
					for entry in self.GC.execute('SELECT id, search_terms, title, system FROM temp_system WHERE (search_terms LIKE "%' + search_query + '%")').fetchall():
						
						Lratio = setratio( unicode(current_file_search_terms).split(), entry[1].split() )
						if Lratio > hi_score:
						
							#check if check to make sure sequels don't get matched to originals
							if [x for x in current_file_search_terms if x.isdigit()] == [y for y in entry[1] if y.isdigit()]:
								hi_score = Lratio
								best_match_game = entry
					
					#if no satisfactory match found, do second pass comparing each letter separately
					if not best_match_game:
						for entry in self.GC.execute('SELECT id, search_terms, title, system FROM temp_system WHERE (search_terms LIKE "%' + search_query + '%")').fetchall():
						
							Lratio = setratio( map(unicode,current_file_search_terms), map(unicode, entry[1]) )
							if Lratio > hi_score:
							
								#check if check to make sure sequels don't get matched to originals
								if [x for x in current_file_search_terms if x.isdigit()] == [y for y in entry[1] if y.isdigit()]:
									hi_score = Lratio
									best_match_game = entry
								
								
					#in verbose mode: ask if game matches
					if (VERBOSE == "SEMI" or VERBOSE == "FULL") and hi_score < .94 and best_match_game:
						best_match_game = best_match_game if self.raw_input_with_timeout('Does %s match %s - %s' % (pcolor('cyan', "["+ rom +"]"), 
																																								pcolor('cyan', "["+ best_match_game[2] +"]"),
																																								pcolor('yellow', "["+"{0:.0f}%".format(float(hi_score) * 100)+"]")), timeout = 10.0) else None
					if VERBOSE == "FULL":
						if best_match_game:
							print 'Closest match for %s is %s - %s' % (pcolor('green', "["+ rom +"]"), pcolor('green', "["+ best_match_game[2] +"]"), pcolor('yellow', "["+"{0:.0f}%".format(float(hi_score) * 100)+"]"))
						else:
							print 'No match found for %s' % (pcolor('red', "[" + rom + "]"))
							
				#Let user know current progress
				if VERBOSE:
					status = r"%10d/%d roms  [%3.2f%%]" % (index + 1, len(roms), (index + 1) * 100. / len(roms))
					status = status + chr(8)*(len(status)+1)
					sys.stdout.write('%s      \r' % (status))
					sys.stdout.flush()
				
				
				#If a suitable match was found, pull info
				if best_match_game:
					temp_game_info = dict(zip(column_names, self.GC.execute('SELECT * from temp_system where id=?', (best_match_game[0],)).fetchone()))
					
					game_info.title = temp_game_info['title']
					game_info.search_terms = temp_game_info['search_terms']
					game_info.release_date = temp_game_info['release_date']
					game_info.overview = temp_game_info['overview']
					game_info.esrb = temp_game_info['esrb']
					game_info.genres = temp_game_info['genres']
					game_info.players = temp_game_info['players']
					game_info.coop = temp_game_info['coop']
					game_info.publisher = temp_game_info['publisher']
					game_info.developer = temp_game_info['developer']
					game_info.rating = temp_game_info['rating']
				else:
					game_info.flags = 'no_match,'
				
				#if name contains brackets [] with a minus '-' inside, glob will error out
				if dont_match == False:
					try:
						#prefer (user added) image, named same as rom + any extension
						temp_image_path = glob.glob( os.path.join( os.path.join(platform['rom_path'], 'images/'), os.path.splitext(rom)[0] ) + '.*')
						game_info.image_file = temp_image_path[0]
					except:
						try:
							#if no rom named image, then find title named image
							if not game_info.image_file:
							
								image_search = self.GC.execute('SELECT image_file FROM image_match WHERE system=? and id=?', (best_match_game[3], best_match_game[0])).fetchone()[0]
								if image_search: image_search =  [os.path.join(platform['rom_path'], 'images/') + image_search + '.*',
																				os.path.join( os.path.join(platform['rom_path'], 'images/'), self.strip_accents(temp_game_info['title']) + '.*')]
								for image in image_search:
									temp_image_path.extend( glob.glob( image ) )
								game_info.image_file = temp_image_path[0]
						except:
							#if no image found, default to rom name with no extension. Boxart thread in romlistscene will try both .jpg and .png extensions
							game_info.image_file = os.path.join( os.path.join(platform['rom_path'], 'images/'), os.path.splitext(rom)[0] )
				else:
					game_info.image_file = os.path.join( os.path.join(platform['rom_path'], 'images/'), os.path.splitext(rom)[0] )
				
				
				rom_list_append((game_info.id, game_info.system,
										game_info.title, game_info.search_terms,
										None, None, #parent, cloneof -> for arcade
										game_info.release_date, game_info.overview,
										game_info.esrb, game_info.genres,
										game_info.players, game_info.coop,
										game_info.publisher, game_info.developer,
										game_info.rating, game_info.command,
										game_info.rom_file, game_info.rom_path,
										game_info.image_file, 0, game_info.flags))
			
			self.LC.executemany('INSERT INTO local_roms '  + 
				'(id, system, title, search_terms, parent, cloneof, release_date, overview, esrb, genres, ' +
				'players, coop, publisher, developer, rating, command, rom_file, rom_path, image_file, number_of_runs, flags) ' +
				'VALUES (' + ('?,' * 21)[:-1] + ')', rom_list)
			self.LOCAL.commit()
			print