def test_empty_mod_type_annot_cove(self):
     empty_mod = Extractor().extract(open('./examples/__init__.py', 'r').read())
     self.assertEqual(1.0, empty_mod.type_annot_cove)
Example #2
0
    def process_project(self, i, project):

        project_id = f'{project["author"]}/{project["repo"]}'
        project_analyzed_files: dict = {
            project_id: {
                "src_files": {},
                "type_annot_cove": 0.0
            }
        }
        try:
            print(f'Running pipeline for project {i} {project_id}')
            project['files'] = []

            print(f'Extracting for {project_id}...')
            extracted_avl_types = None

            project_files = list_files(
                join(self.projects_path, project["author"], project["repo"]))
            print(
                f"{project_id} has {len(project_files)} files before deduplication"
            )
            project_files = [
                f for f in project_files if not self.is_file_duplicate(f)
            ]
            print(
                f"{project_id} has {len(project_files)} files after deduplication"
            )

            project_files = [
                (f, str(Path(f).relative_to(Path(self.projects_path).parent)))
                for f in project_files
            ]
            project_files = [(f, f_r, self.split_dataset_files[f_r]
                              if f_r in self.split_dataset_files else None)
                             for f, f_r in project_files]

            if len(project_files) != 0:
                if self.use_pyre:
                    print(f"Running pyre for {project_id}")
                    clean_pyre_config(
                        join(self.projects_path, project["author"],
                             project["repo"]))
                    pyre_server_init(
                        join(self.projects_path, project["author"],
                             project["repo"]))

                for filename, f_relative, f_split in project_files:
                    try:
                        pyre_data_file = pyre_query_types(
                            join(self.projects_path, project["author"],
                                 project["repo"]),
                            filename) if self.use_pyre else None

                        project_analyzed_files[project_id]["src_files"][f_relative] = \
                            self.apply_nlp_transf(
                                Extractor().extract(read_file(filename), pyre_data_file).to_dict()) if self.nlp_transf \
                                else Extractor.extract(read_file(filename), pyre_data_file).to_dict()

                        project_analyzed_files[project_id]["src_files"][
                            f_relative]['set'] = f_split
                        if self.use_tc:
                            print(f"Running type checker for file: {filename}")
                            project_analyzed_files[project_id]["src_files"][f_relative]['tc'] = \
                                type_check_single_file(filename, self.tc)

                        extracted_avl_types = project_analyzed_files[project_id]["src_files"][f_relative]['imports'] + \
                                              [c['name'] for c in
                                               project_analyzed_files[project_id]["src_files"][f_relative]['classes']]
                    except ParseError as err:
                        # print(f"Could not parse file {filename}")
                        traceback.print_exc()
                        self.logger.error(
                            "project: %s |file: %s |Exception: %s" %
                            (project_id, filename, err))
                    except UnicodeDecodeError:
                        print(f"Could not read file {filename}")
                    except Exception as err:
                        # Other unexpected exceptions; Failure of single file should not
                        # fail the entire project processing.
                        # TODO: A better workaround would be to have a specialized exception thrown
                        # by the extractor, so that this exception is specialized.
                        #print(f"Could not process file {filename}")
                        traceback.print_exc()
                        self.logger.error(
                            "project: %s |file: %s |Exception: %s" %
                            (project_id, filename, err))
                        #logging.error("project: %s |file: %s |Exception: %s" % (project_id, filename, err))

                print(f'Saving available type hints for {project_id}...')
                if self.avl_types_dir is not None:
                    if extracted_avl_types:
                        with open(
                                join(
                                    self.avl_types_dir,
                                    f'{project["author"]}_{project["repo"]}_avltypes.txt'
                                ), 'w') as f:
                            for t in extracted_avl_types:
                                f.write("%s\n" % t)

                if len(project_analyzed_files[project_id]
                       ["src_files"].keys()) != 0:
                    project_analyzed_files[project_id]["type_annot_cove"] = \
                        round(sum([project_analyzed_files[project_id]["src_files"][s]["type_annot_cove"] for s in
                                   project_analyzed_files[project_id]["src_files"].keys()]) / len(
                            project_analyzed_files[project_id]["src_files"].keys()), 2)

                    save_json(self.get_project_filename(project),
                              project_analyzed_files)

                if self.use_pyre:
                    pyre_server_shutdown(
                        join(self.projects_path, project["author"],
                             project["repo"]))

            else:
                raise NullProjectException(project_id)

        except KeyboardInterrupt:
            quit(1)
        except NullProjectException as err:
            self.logger.error(err)
            print(err)
        except Exception as err:
            print(f'Running pipeline for project {i} failed')
            traceback.print_exc()
            self.logger.error("project: %s | Exception: %s" %
                              (project_id, err))
Example #3
0
 def setUpClass(cls):
     cls.processed_f = Extractor().extract(
         open('examples/qualified_types.py', 'r').read()).to_dict()
from libsa4py.cst_extractor import Extractor
from libsa4py.representations import FunctionInfo, ModuleInfo, create_output_seq, validate_output_seq
from libsa4py.nl_preprocessing import normalize_module_code
from libsa4py.exceptions import OutputSequenceException
from libsa4py.utils import read_file
import unittest

processed_f = Extractor().extract(open('./examples/representations.py', 'r').read())


class TestModuleRepresentations(unittest.TestCase):
    """
    It tests the Dict-based representation of modules
    """

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.maxDiff = None

    def test_mod_repr_dict_keys(self):
        mod_repr_dict_key_exp = ['untyped_seq', 'typed_seq', 'imports', 'variables', 'mod_var_occur', 'mod_var_ln',
                                 'classes', 'funcs', 'set', 'tc', 'no_types_annot', 'type_annot_cove']
        self.assertListEqual(mod_repr_dict_key_exp, list(processed_f.to_dict().keys()))

    def test_mod_repr_cls_dict(self):
        cls_repr_mod_exp = [{'name': 'MyClass', 'q_name': 'MyClass', 'cls_lc': ((12, 0), (23, 44)),
                             'variables': {'cls_var': 'builtins.int'},
                             'cls_var_occur': {'cls_var': [['MyClass', 'cls_var', 'c', 'n']]},
                             'cls_var_ln': {'cls_var': ((16, 4), (16, 11))},
                             'funcs': [{'name': '__init__', 'q_name': 'MyClass.__init__', 'fn_lc': ((18, 4), (19, 18)),
                                        'params': {'self': '', 'y': 'builtins.float'}, 'ret_exprs': [],
Example #5
0
 def setUpClass(cls):
     cls.processed_f = Extractor().extract(open('./examples/assignments.py', 'r').read()).to_dict()
Example #6
0
 def setUpClass(cls):
     cls.processed_f = Extractor().extract(
         open('./examples/different_fns.py', 'r').read()).to_dict()
Example #7
0
 def setUpClass(cls):
     cls.processed_f = Extractor().extract(
         open('./examples/vars_args_occur.py', 'r').read()).to_dict()
Example #8
0
 def setUpClass(cls):
     cls.extractor_out = Extractor().extract(
         read_file('./examples/vars_types_pyre.py'),
         load_json('./examples/vars_types_pyre_data.json'))
Example #9
0
 def setUpClass(cls):
     cls.extractor_out = Extractor().extract(
         read_file('./examples/representations.py'))
     cls.extractor_out_wo_seq2seq = Extractor().extract(
         read_file('./examples/representations.py'), include_seq2seq=False)