Exemple #1
0
    def __init__(self, task, pov_replace_unk):
        import nltk
        from nltk.translate.bleu_score import SmoothingFunction
        self._smooth_fn = SmoothingFunction().method4
        nltk.download('wordnet', quiet=True)

        self.pov_replace_unk = pov_replace_unk

        dataset_name = PurePosixPath(task.args.data.split(':')[0]).name
        if dataset_name.startswith('java'):
            import javalang
            self._javalang = javalang
            self._parse_method = self._parse_java
        elif dataset_name.startswith('python'):
            self._javalang = None
            self._parse_method = self._parse_python

            # To fix the bug of original dataset (a temporarily solution).
            self._fix_python_orig_bug = dataset_name.endswith('orig')
        else:
            raise RuntimeError(
                'code generation scorer requires only support Java and Python now, '
                'dataset name must be started with "java" or "python"')

        self.reset()
Exemple #2
0
def parseBed2Json(bedfile):
    # test if the file exitst
    try:
        # try resolve path and get the absolute path
        my_abs_path_file = Path(bedfile).resolve()

        ###### Open bed file and parse to json #####
        jsonData = []

        # open bed file
        infile = open(bedfile, 'r').readlines()

        # read header
        header = infile.pop(0).strip('\n').split("\t")
        for line in infile:

            # get row and convert to array
            data = line.strip('\n').split("\t")

            # test if the row is complete and have the same size as the header
            if len(data) == len(header):
                datadict = {}

                # iterate over my bedfile row
                for i, element in enumerate(data):
                    datadict[header[i]] = element

                # push into array the dictionary structure
                jsonData.append(datadict)

        ######## write into file the json structure

        # bed filename (without path)
        filename = PurePosixPath(my_abs_path_file).name

        # json file name
        jsonFile = filename + ".json"
        # test if file ends with .bed
        if filename.endswith('.bed'):
            jsonFile = filename.split(".bed")[0] + ".json"

        with open(jsonFile, 'w') as outfile:
            json.dump(jsonData, outfile, indent=4)
        outfile.close()

    except FileNotFoundError:
        print("your file does not exist")