def language_percentage(file_paths): """ Computes the percentage composition of each language, with unknown extensions tagged with the ``Unknown`` key. :param file_paths: A list of file paths. :return: A dict with file name as key and the percentage of occurences as the value. """ if file_paths: delta = 100 / len(file_paths) results = defaultdict(lambda: 0) for file_path in file_paths: ext = os.path.splitext(file_path)[1] if ext in exts: for lang in exts[ext]: results[lang] += delta continue hashbang = get_hashbang(file_path) if not hashbang: continue language = get_language_from_hashbang(hashbang).lower() for ext in exts: for lang in exts[ext]: if language == lang.lower(): results[lang.lower()] += delta return results
def test_no_eol(self): with NamedTemporaryFile(mode='w+t', delete=False) as temp_file: temp_file.write('#!bin/bash') temp_file.close() self.assertEqual(get_hashbang(temp_file.name), '#!bin/bash')
def test_missing_file(self): self.assertIsNone(get_hashbang('does_not_exist'))
def test_no_hash(self): with NamedTemporaryFile(mode='w+t', delete=False) as temp_file: temp_file.write('!bin/bash') temp_file.close() self.assertIsNone(get_hashbang(temp_file.name))
def test_non_unicode_file(self): with NamedTemporaryFile(mode='w+b', delete=False) as temp_file: temp_file.write(b'\2000x80') temp_file.close() self.assertIsNone(get_hashbang(temp_file.name))