Exemple #1
0
class TestCoder(unittest.TestCase):
    def setUp(self):
        # Init coder
        print("Initializing coder...")
        self.checker = self.checkerList[self.checkerIndex]
        self.dictionary = Dictionary(self.checker)
        self.coder = Coder(self.dictionary)
        # Load all data from DB
        print("Fetching data from database...")
        self.allData = self.db.getFixDataForChecker(self.checker)
        self.allDataLen = len(self.allData)
        print("Done, fetched {0} records".format(self.allDataLen))

    def tearDown(self):
        self.checkerIndex += 1

    @classmethod
    def setUpClass(self):
        print("Starting up...")
        self.db = CFDatabase(config.getCfDbFile())
        self.checkers = Checkers()
        self.checkerList = ['deadcode.DeadStores']
        self.checkerIndex = 0

    def testDeadcodeDeadStores(self):
        self.assertTrue(self.allDataLen > 0, msg="No data found")

        # Encode all data
        print("Testing encoding")
        i = 0
        while i < self.allDataLen:
            checkerInfo = self.checkers.extractTokensForChecker(
                self.checker, self.allData[i][4])
            encodedBugData, initialUnkList = self.coder.encode(
                self.allData[i][1], checkerData=checkerInfo)
            encodedFixData, finalUnkList = self.coder.encode(
                self.allData[i][2], unkList=initialUnkList, reverse=False)
            if -1 in encodedBugData:
                print(
                    "{0}: [{2} - {3} ({1})] Some tokens were not parsed (bug), ignoring (lenUnk = {1})"
                    .format(i + 1, len(finalUnkList), len(encodedBugData),
                            len(encodedFixData)))
            elif -1 in encodedFixData:
                print(
                    "{0}: [{2} - {3} ({1})] Some tokens were not parsed (fix), ignoring (lenUnk = {1})"
                    .format(i + 1, len(finalUnkList), len(encodedBugData),
                            len(encodedFixData)))
            else:
                print("{0}: [{2} - {3} ({1})] Done (lenUnk = {1})".format(
                    i + 1, len(finalUnkList), len(encodedBugData),
                    len(encodedFixData)))
                textBug = self.coder.decode(encodedBugData, finalUnkList, True)
                textFix = self.coder.decode(encodedFixData, finalUnkList)
                self.assertEqual(textBug, self.allData[i][1])
                self.assertEqual(textFix, self.allData[i][2])
            i += 1

        print("All done.")
Exemple #2
0
def inference(args):
    cuda_available = torch.cuda.is_available()
    model = RNN.load_from_checkpoint(args.model_path).to('cuda:0' if cuda_available else 'cpu')

    dataset = InferenceDataset(args.data_path)
    dataloader = DataLoader(dataset, args.batch_size, num_workers=args.num_workers)

    result = defaultdict(lambda: defaultdict(lambda: Counter()))

    print('>> started inference')
    for batch in dataloader:
        contig, position, X = batch
        X = X.type(torch.cuda.LongTensor if cuda_available else torch.LongTensor)

        output = model(X)
        Y = torch.argmax(output, dim=2).long().cpu().numpy()

        for c, pos, ys in zip(contig, position, Y):
            for p, y in zip(pos, ys):
                base = Coder.decode(y)

                current_position = (p[0].item(), p[1].item())
                result[c][current_position][base] += 1

    print('>> started processing of results')
    contigs = dataset.contigs
    records = []
    for contig in result:
        values = result[contig]

        sorted_positions = sorted(values)
        sorted_positions = list(itertools.dropwhile(lambda x: x[1] != 0, sorted_positions))

        first = sorted_positions[0][0]
        contig_data = contigs[contig]
        seq = contig_data[0][:first]

        for _, p in enumerate(sorted_positions):
            base, _ = values[p].most_common(1)[0]
            if base == Coder.GAP: continue
            seq += base

        last_position = sorted_positions[-1][0]
        seq += contig_data[0][last_position+1:]

        seq = Seq(seq)
        record = SeqRecord.SeqRecord(seq, id=contig)
        records.append(record)

    with open(args.out_path, 'w') as f:
        SeqIO.write(records, f, 'fasta')
Exemple #3
0
class Predictor():
    def __init__(self):
        self.vcs = GitProvider(config.getRepoDir())
        self.ccdb = CCDatabase(config.getCcDbFile())
        self.codeChecker = CodeChecker(config.getRepoDir())
        self.checkers = Checkers()
        self.loadCommitList()

    def loadCommitList(self):
        self.commits = self.vcs.getAllVersions(config.getBranch())
        self.currentCommitIndex = 0

    def convertFilePathToRepoRelativePath(self, path):
        return os.path.relpath(path, config.getRepoDir())

    def getDiffResolvedIds(self):
        resolved = self.codeChecker.diffResolved(config.getCcRunName(),
                                                 config.getTmpDir(), self.ccdb)
        ids = []
        for bug in resolved:
            ids.append(bug['reportId'])
        return ids

    def predict(self, id, checker):
        # Load all bugs
        print("Loading bug data...")
        ids = []
        if id == -1:
            bugs = self.ccdb.getAllBugsForChecker(checker)
            ids = [x[0] for x in bugs]
        else:
            ids.append(id)

        # Loading model
        print("Loading model...")
        model = load_model(config.cfModelFilenameFormat.format(checker))
        model.summary()
        vLabels = ['NOT OK', 'OK', 'Skipped']

        # Initialize coder
        print("Initializing coder...")
        self.dictionary = Dictionary(checker)
        self.coder = Coder(self.dictionary)
        self.totalDictionaryLength = self.dictionary.length()

        # Predicting
        print("Starting predictions...")
        for i in ids:
            allData = self.ccdb.getBugData(i)
            if allData.getChecker(
            ) not in globals.availableCheckers or allData.getChecker(
            ) != checker:
                print("Bug #{0} - checker not supported".format(i))
            else:
                # Load extra tokens from checker message
                checkerInfo = self.checkers.extractTokensForChecker(
                    allData.getChecker(), allData.getMessage())
                # Retrieve code fragment with bug
                fileRelativePath = self.convertFilePathToRepoRelativePath(
                    allData.getFile())
                fullCodeWithBug = self.vcs.getFileContents(
                    fileRelativePath, self.commits[self.currentCommitIndex])
                extractor = CodeExtractor(allData)
                extractor.loadCodeFromText(fullCodeWithBug)
                extractor.extractBugCode()
                bugCodeFragment = extractor.getBugCodeFragment()
                fixCodeFragment = ''
                # Encode it
                encodedBugData, initialUnkList = self.coder.encode(
                    bugCodeFragment, checkerData=checkerInfo)
                # Convert to one-hot
                MODEL_X_MAX_LEN = model.get_layer(index=0).input_shape[1]
                if len(encodedBugData) > MODEL_X_MAX_LEN:
                    print(
                        "Bug #{0} - Code too big for model, ignored".format(i))
                    continue
                elif id == -1:
                    print("Bug #{0} - Good to go".format(i))
                    continue
                noZerosToPad = MODEL_X_MAX_LEN - len(encodedBugData)
                if noZerosToPad > 0:
                    encodedBugData = self.coder.applyPadding(
                        encodedBugData, noZerosToPad)
                X = np.zeros((1, MODEL_X_MAX_LEN, self.totalDictionaryLength))
                X[0] = self.coder.convertToOneHot(
                    encodedBugData,
                    np.zeros((MODEL_X_MAX_LEN, self.totalDictionaryLength)))
                # Predict and convert from one-hot
                Y = self.coder.convertFromOneHot(model.predict(X)[0])
                print(Y)
                # Decode
                Y = self.coder.removePadding(Y)
                fixCodeFragment = self.coder.decode(Y, initialUnkList)

                #Verify?
                vStatus = 2
                if config.cfVerifyPrediction:
                    # Apply fix in source code file
                    extractor.applyFix(fixCodeFragment)
                    extractor.saveToFile(allData.getFile())
                    # Run CodeChecker and analyze code
                    self.codeChecker.check(True)
                    resolvedIds = self.getDiffResolvedIds()
                    # Check if ID is resolved in tmp folder
                    isFixed = i in resolvedIds
                    # Set vStatus accordingly
                    if isFixed:
                        vStatus = 1
                    else:
                        vStatus = 0
                #Print
                print("Bug #{0} - summary".format(i))
                print("== Code fragment with bug ==")
                print(bugCodeFragment)
                print("== Suggested fix ==")
                print(fixCodeFragment)
                print("Verification: {0}".format(vLabels[vStatus]))
                a = ' '
                while a != 'y' and a != 'n':
                    a = input("Apply fix? (y/n): ")
                if a == 'y':
                    if not config.cfVerifyPrediction:
                        # Apply fix in source code file
                        extractor.applyFix(fixCodeFragment)
                        extractor.saveToFile(allData.getFile())
                elif config.cfVerifyPrediction:
                    # Revert file contents
                    self.vcs.checkout(self.commits[self.currentCommitIndex])
                print('Done')
        print("All done, exiting...")
Exemple #4
0
            v_new = s.add_transit(new_state=to_layer + from_layer,
                                  gates=v_old,
                                  op1=to_layer,
                                  op2=from_layer)

    print(c.list_tokens())

    weights, biases, _, residual = s.flash()
    for k in weights:
        w, b = weights[k], biases[k]
        print(k)
        print(w)
        print(b.T)

    a = {"gates": v_old, "op1": c.encode("SC"), "op2": c.encode("SC")}
    wvb = np.zeros(v_old.shape)
    for k in weights:
        w, b = weights[k], biases[k]
        wvb += w.dot(a[k[1]]) + b
    z = np.zeros(v_old.shape)
    a = {"gates": act.f(wvb), "op1": z, "op2": z}
    wvb = np.zeros(v_old.shape)
    for k in weights:
        w, b = weights[k], biases[k]
        wvb += w.dot(a[k[1]]) + b
    v_test = act.f(wvb)

    for v in [v_old, v_test, v_new]:
        print(c.decode(v), v.T)
    print(act.e(v_test, v_new).T)
Exemple #5
0
    def main(self):
        # Do analysis
        shutil.rmtree(config.getTmpDir())
        self.codeChecker.check(True)

        # Diff new
        newBugs = self.getDiffNew()

        if len(newBugs) < 1:
            print('No new bugs introduced, commit is accepted!')
            return
        
        print("New bugs found! Count: {0}. Attempting repairs...".format(len(newBugs)))

        # Load models
        models = {}
        for checker in globals.availableCheckers:
            models[checker] = load_model(config.cfModelFilenameFormat.format(checker))

        # Load all content from files having new
        files = set([self.convertFilePathToRepoRelativePath(x.getFile()) for x in newBugs])
        fileContents = {}
        for f in files:
            fn = config.getRepoDir() + f
            with open(fn, 'r') as fh:
                fileContents[f] = ''.join(fh.readlines())

        # For each file sort by bug line desc
        suggestions = []
        validSuggestions = 0
        for f in files:
            bugs = [x for x in newBugs if self.convertFilePathToRepoRelativePath(x.getFile()) == f]
            bugs.sort(key=lambda x: x.getLine(), reverse=True)
            print("=== File: {0} ===".format(f))
            # For each bug get a suggestion and test it
            for b in bugs:
                print("L{0}, Type: {1}".format(b.getLine(), b.getChecker()))
                # Prepare useful data
                dictionary = Dictionary(b.getChecker())
                coder = Coder(dictionary)
                totalDictionaryLength = dictionary.length()
                # Prepare and extract bug fragment
                checkerInfo = self.checkers.extractTokensForChecker(b.getChecker(), b.getMessage())
                extractor = CodeExtractor(b)
                extractor.loadCodeFromText(fileContents[f])
                extractor.extractBugCode()
                bugCodeFragment = extractor.getBugCodeFragment()
                fixCodeFragment = ''
                # Encode it
                encodedBugData, initialUnkList = coder.encode(bugCodeFragment, checkerData = checkerInfo)
                # Convert to one-hot
                MODEL_X_MAX_LEN = models[b.getChecker()].get_layer(index = 0).input_shape[1]

                if len(encodedBugData) > MODEL_X_MAX_LEN:
                    print("Ignored: Code too big for model")
                    continue

                noZerosToPad = MODEL_X_MAX_LEN - len(encodedBugData)
                if noZerosToPad > 0:
                    encodedBugData = coder.applyPadding(encodedBugData, noZerosToPad)
                X = np.zeros((1, MODEL_X_MAX_LEN, totalDictionaryLength))
                X[0] = coder.convertToOneHot(encodedBugData, np.zeros((MODEL_X_MAX_LEN, totalDictionaryLength)))
                # Predict and convert from one-hot
                Y = coder.convertFromOneHot(models[b.getChecker()].predict(X)[0])
                Y = coder.removePadding(Y)
                # Decode
                fixCodeFragment = coder.decode(Y, initialUnkList)[:-1]
                
                #Verify?
                vStatus = 2
                if config.cfVerifyPrediction:
                    # Apply fix in source code file
                    extractor.applyFix(fixCodeFragment)
                    extractor.saveToFile(b.getFile())
                    # Run CodeChecker and analyze code
                    shutil.rmtree(config.getTmpDir())
                    compilationLog = self.codeChecker.check(True)
                    newBugsAfterFix = self.getDiffNew()
                    # Check if ID is resolved in tmp folder
                    isFixed = 'Build failed' not in compilationLog
                    for nb in newBugsAfterFix:
                        if self.isBugDataEqual(b, nb):
                            isFixed = False
                    # Set vStatus accordingly
                    if isFixed:
                        vStatus = 1
                    else:
                        vStatus = 0
                    # Revert file
                    extractor.loadCodeFromText(fileContents[f])
                    extractor.saveToFile(b.getFile())
                if vStatus == 0:
                    print("Verification: Negative, cannot be applied")
                elif vStatus == 1:
                    print("Verification: Positive, can be applied")
                    validSuggestions += 1
                elif vStatus == 2:
                    print("Verification: Skipped")
                    validSuggestions += 1
                sugg = SuggestionData(f, b, bugCodeFragment, fixCodeFragment, vStatus)
                suggestions.append(sugg)
        print("Valid suggestions prepared for {0} / {1} bugs.".format(validSuggestions, len(newBugs)))

        if validSuggestions > 0:
            print("Apply valid suggestions (a), display them (d), ignore them (i) or abort commit (q)?")
            apply = False
            choice = True
            while choice:
                c = sys.stdin.read(1)
                if c == 'a':
                    apply = True
                    choice = False
                    print("Applying fixes...")
                elif c == 'i':
                    choice = False
                    print("Fixes ignored...")
                elif c == 'd':
                    self.displaySuggestions(suggestions)
                    print("Apply valid suggestions (a), ignore them (i) or abort commit (q)?")
                elif c == 'q':
                    print("Aborting commit...")
                    sys.exit(1)
            if apply:
                self.applyValidFixes(suggestions, files)
                print("Fixes applied!")
        if validSuggestions != len(newBugs):
            print("Unable to fix all bugs, continue with commit (c) or abort (q)?")
            choice = True
            while choice:
                c = sys.stdin.read(1)
                if c == 'c':
                    choice = False
                    print("Continuing...")
                elif c == 'q':
                    print("Aborting commit...")
                    sys.exit(1)
        else:
            print("Bugs corrected, commit is good to go!")
Exemple #6
0
from coder import Coder, MorseCoder
import string

if __name__ == "__main__":
    # By default, the translator will encode files by switching them to uppercase
    translator = Coder(string.ascii_lowercase, string.ascii_uppercase)

    while (1):
        line = raw_input()
        coded = translator.encode(line)
        print coded
        print translator.decode(coded)