Ejemplo n.º 1
0
def test_decode_msg_random_key():
    #     'abcdefghijklmnopqrstuvwxyz'
    key = '!)"(£*%&><@abcdefghijklmno'
    coder = Coder(key)
    plaintext = 'calendario'
    ciphertext = '"!a£c(!g>d'
    assert coder.decrypt(ciphertext) == plaintext
Ejemplo n.º 2
0
    def build(self, checker):
        # Initialize coder
        print("Initializing coder...")
        self.dictionary = Dictionary(checker)
        self.coder = Coder(self.dictionary)

        # Load all data from DB
        print("Fetching data from database...")
        allData = self.db.getFixDataForChecker(checker)
        allDataLen = len(allData)
        print("Done, fetched {0} records".format(allDataLen))
        if allDataLen < 1:
            print("No data found")
            return

        # Encode all data
        print("Encoding all data and writing to output file...")
        i = 0
        (maxBug, maxFix,
         maxUnk) = self.checkers.getModelStatsForChecker(checker)
        with open(config.cfTrainFilenameFormat.format(checker), 'w') as f:
            while i < allDataLen:
                checkerInfo = self.checkers.extractTokensForChecker(
                    checker, allData[i][4])
                encodedBugData, initialUnkList = self.coder.encode(
                    allData[i][1], checkerData=checkerInfo)
                encodedFixData, finalUnkList = self.coder.encode(
                    allData[i][2], unkList=initialUnkList, reverse=False)
                if -1 in encodedBugData:
                    print(
                        "{0}: [{2} - {3} ({1})] Some tokens were not parsed (bug), ignoring (lenUnk = {1})"
                        .format(i + 1, len(finalUnkList), len(encodedBugData),
                                len(encodedFixData)))
                elif -1 in encodedFixData:
                    print(
                        "{0}: [{2} - {3} ({1})] Some tokens were not parsed (fix), ignoring (lenUnk = {1})"
                        .format(i + 1, len(finalUnkList), len(encodedBugData),
                                len(encodedFixData)))
                elif len(encodedBugData) > maxBug or len(
                        encodedFixData) > maxFix or len(finalUnkList) > maxUnk:
                    print(
                        "{0}: [{2} - {3} ({1})] Some tokens were not parsed (lengths), ignoring (lenUnk = {1})"
                        .format(i + 1, len(finalUnkList), len(encodedBugData),
                                len(encodedFixData)))
                else:
                    print("{0}: [{2} - {3} ({1})] Done (lenUnk = {1})".format(
                        i + 1, len(finalUnkList), len(encodedBugData),
                        len(encodedFixData)))
                    f.write(
                        json.dumps({
                            'x': encodedBugData,
                            'y': encodedFixData
                        }) + '\n')

                i += 1
                print('Done {0}'.format(i), file=sys.stderr)

        print("All done, exiting...")
Ejemplo n.º 3
0
class TestCoder(unittest.TestCase):
    def setUp(self):
        # Init coder
        print("Initializing coder...")
        self.checker = self.checkerList[self.checkerIndex]
        self.dictionary = Dictionary(self.checker)
        self.coder = Coder(self.dictionary)
        # Load all data from DB
        print("Fetching data from database...")
        self.allData = self.db.getFixDataForChecker(self.checker)
        self.allDataLen = len(self.allData)
        print("Done, fetched {0} records".format(self.allDataLen))

    def tearDown(self):
        self.checkerIndex += 1

    @classmethod
    def setUpClass(self):
        print("Starting up...")
        self.db = CFDatabase(config.getCfDbFile())
        self.checkers = Checkers()
        self.checkerList = ['deadcode.DeadStores']
        self.checkerIndex = 0

    def testDeadcodeDeadStores(self):
        self.assertTrue(self.allDataLen > 0, msg="No data found")

        # Encode all data
        print("Testing encoding")
        i = 0
        while i < self.allDataLen:
            checkerInfo = self.checkers.extractTokensForChecker(
                self.checker, self.allData[i][4])
            encodedBugData, initialUnkList = self.coder.encode(
                self.allData[i][1], checkerData=checkerInfo)
            encodedFixData, finalUnkList = self.coder.encode(
                self.allData[i][2], unkList=initialUnkList, reverse=False)
            if -1 in encodedBugData:
                print(
                    "{0}: [{2} - {3} ({1})] Some tokens were not parsed (bug), ignoring (lenUnk = {1})"
                    .format(i + 1, len(finalUnkList), len(encodedBugData),
                            len(encodedFixData)))
            elif -1 in encodedFixData:
                print(
                    "{0}: [{2} - {3} ({1})] Some tokens were not parsed (fix), ignoring (lenUnk = {1})"
                    .format(i + 1, len(finalUnkList), len(encodedBugData),
                            len(encodedFixData)))
            else:
                print("{0}: [{2} - {3} ({1})] Done (lenUnk = {1})".format(
                    i + 1, len(finalUnkList), len(encodedBugData),
                    len(encodedFixData)))
                textBug = self.coder.decode(encodedBugData, finalUnkList, True)
                textFix = self.coder.decode(encodedFixData, finalUnkList)
                self.assertEqual(textBug, self.allData[i][1])
                self.assertEqual(textFix, self.allData[i][2])
            i += 1

        print("All done.")
Ejemplo n.º 4
0
 def setUp(self):
     # Init coder
     print("Initializing coder...")
     self.checker = self.checkerList[self.checkerIndex]
     self.dictionary = Dictionary(self.checker)
     self.coder = Coder(self.dictionary)
     # Load all data from DB
     print("Fetching data from database...")
     self.allData = self.db.getFixDataForChecker(self.checker)
     self.allDataLen = len(self.allData)
     print("Done, fetched {0} records".format(self.allDataLen))
Ejemplo n.º 5
0
 def encode_clicked(self, widget):
     enc = self.name_combo.get_active()
     if enc == 1:
         start = self.textbuffer.get_start_iter()
         end = self.textbuffer.get_end_iter()
         string = self.textbuffer.get_text(start, end, True)
         coder = Coder().url_encode(string)
         self.textbuffer_result.set_text(coder)
     elif enc == 0:
         start = self.textbuffer.get_start_iter()
         end = self.textbuffer.get_end_iter()
         string = self.textbuffer.get_text(start, end, True)
         coder = Coder().base64_encode(string)
         self.textbuffer_result.set_text(coder)
Ejemplo n.º 6
0
    def __init__(self, N, mask_frac, stabil=10):
        self.stabil = stabil
        self.mask_frac = mask_frac

        size = N**2
        self.size = size
        self.mask_size = int(self.size / self.mask_frac)
        pad = 0.0001

        ### Create layers
        self.act = tanh_activator(pad, size)
        #self.act_mask = gate_activator(pad, size)
        #self.act_mask = logistic_activator(pad, size)
        self.act_mask = heaviside_activator(size)

        self.reg_layer, self.mem_layer, self.ptr_layer = (Layer(
            k, (N, N), self.act, Coder(self.act)) for k in "rmp")

        # Gating masks
        self.masks = {}
        self.w_mask = np.zeros((size, size))

        # Weight matrices
        self.w_mm = np.zeros((size, size))
        self.w_pm = np.zeros((size, size))
        self.w_mp = np.zeros((size, size))

        # Dummy bias to avoid extra memory allocation
        self.dummy_bias = np.zeros((size, 1))
Ejemplo n.º 7
0
    def __init__(self,
                 layer_shape,
                 pad,
                 activator,
                 learning_rule,
                 register_names,
                 shapes={},
                 tokens=[],
                 orthogonal=False,
                 verbose=False):

        self.tokens = tokens
        self.orthogonal = orthogonal
        self.register_names = register_names
        # default registers
        layer_size = layer_shape[0] * layer_shape[1]
        act = activator(pad, layer_size)
        registers = {
            name: Layer(name, layer_shape, act, Coder(act))
            for name in register_names
        }
        self.net = NVMNet(layer_shape,
                          pad,
                          activator,
                          learning_rule,
                          registers,
                          shapes=shapes,
                          tokens=tokens,
                          orthogonal=orthogonal,
                          verbose=verbose)
Ejemplo n.º 8
0
def get_postions_and_labels(align, ref, region):
    """
    Returns list of corresponding positions and labels.

    Parameters
    ----------
    align : align for which positions and labels are required
    ref : corresponding reference sequence
    region : corresponding region
    """

    start, end = region.start, region.end
    if start is None: start = 0
    if end is None: end = float('inf')
    start, end = max(start, align.start), min(end, align.end)

    positions = []
    labels = []

    pairs = get_pairs(align.align, ref)
    current_position = None
    insert_count = 0

    for pair in itertools.dropwhile(lambda p: (p.ref_position is None) or (p.ref_position < start), pairs):
        if pair.ref_position == align.align.reference_end or (pair.ref_position is not None and pair.ref_position >= end):
            break

        if pair.ref_position is None:
            insert_count += 1
        else:
            insert_count = 0
            current_position = pair.ref_position

        position = (current_position, insert_count)
        positions.append(position)

        label = pair.query_base.upper() if pair.query_base else Coder.GAP

        try:
            encoded_label = Coder.encode(label)
        except KeyError:
            encoded_label = Coder.encode(Coder.UNKNOWN)

        labels.append(encoded_label)

    return positions, labels
Ejemplo n.º 9
0
def main(app):
    codes = create_codes(app)
    cc = Coder.create_from_codes(codes)

    for rel_path, code in cc.itercode():
        print "\n\n============ %s ============\n" % rel_path, code
        #if rel_path.endswith('.py'):
        #    print check(code, 'test.py')
    return (codes, cc)
Ejemplo n.º 10
0
 def test_image_base64_encode_with_dataurl(self):
     image = 'images/One_black_Pixel.png'
     test_value = ('<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgA'
                   'AAAEAAAABCAIAAACQd1PeAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjw'
                   'v8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAAMSURBVBhXY2BgYAAAA'
                   'AQAAVzN/2kAAAAASUVORK5CYII=" >')
     dataurl = True
     self.assertEqual(Coder().image_base64_encode(image, dataurl),
                      test_value)
Ejemplo n.º 11
0
def main(app):
    codes = create_codes(app)
    cc = Coder.create_from_codes(codes)

    for rel_path, code in cc.itercode():
        print "\n\n============ %s ============\n" % rel_path, code
        #if rel_path.endswith('.py'):
        #    print check(code, 'test.py')
    return (codes, cc)
Ejemplo n.º 12
0
def ric_encode(imgbuf, config):
    img = Image.open(StringIO(imgbuf))
    layers = Coder().encode(img, config)
    wrapped_layers, offsetTable = wrapper.wrapLayers(layers)
    output = ""
    output += iso_media.write_box("FTYP", "RIC ")
    output += iso_media.write_box("ILOT", write_layer_offsets(offsetTable))
    output += wrapped_layers
    return output
Ejemplo n.º 13
0
    def _send_message(self):
        self.chat_id = input('Enter chat id:\n')
        if (not self.chat_id):
            self.chat_id = LISTENER

        message = input('Enter message:\n')

        self._send_bits(START_MESSAGE)
        self._send_bits(Coder.encode(message))
        self._send_bits(END_MESSAGE)
Ejemplo n.º 14
0
def main():
    print(_welcome)

    while True:
        mode = input('\nCommand: ')

        if mode == 'q':
            return

        output = ''
        msg = input('Encode: ' if mode == '0' else 'Decode: ')
        if mode == '0':
            output = Coder.msg_to_code(msg)
            print(' "{}" 已复制'.format(output))
        else:
            output = Coder.code_to_msg(msg)
            print(' "{}" 已复制'.format(output))
        os.system('echo "{}" | pbcopy'.format(output))
        print('')
Ejemplo n.º 15
0
def main(variant):
    with open('variant', 'w') as f:
        f.write(variant)

    encoder = Coder(variant)
    paths = []
    chunk_num = 0
    max_chunk_num = 2

    while True:
        tokens = {}
        i = 1
        if chunk_num == max_chunk_num:
            break

        documents = docreader.DocumentStreamReader(
            docreader.parse_command_line().files)
        for doc in documents:
            if chunk_num == 0:
                paths.append(doc.url)

            words = doc2words.extract_words(doc.text)

            for word in set(words):
                if word in tokens:
                    tokens[word].append(i)
                elif len(word) % max_chunk_num == chunk_num:
                    tokens[word] = array('l', [i])

            i += 1

        for token in tokens:
            tokens[token] = encoder.encode(tokens[token])

        with open('index{}.pkl'.format(chunk_num), 'wb') as f:
            pickle.dump(tokens, f)

        chunk_num += 1
        first = False

    with open('paths.pkl', 'wb') as f:
        pickle.dump(paths, f)
Ejemplo n.º 16
0
def inference(args):
    cuda_available = torch.cuda.is_available()
    model = RNN.load_from_checkpoint(args.model_path).to('cuda:0' if cuda_available else 'cpu')

    dataset = InferenceDataset(args.data_path)
    dataloader = DataLoader(dataset, args.batch_size, num_workers=args.num_workers)

    result = defaultdict(lambda: defaultdict(lambda: Counter()))

    print('>> started inference')
    for batch in dataloader:
        contig, position, X = batch
        X = X.type(torch.cuda.LongTensor if cuda_available else torch.LongTensor)

        output = model(X)
        Y = torch.argmax(output, dim=2).long().cpu().numpy()

        for c, pos, ys in zip(contig, position, Y):
            for p, y in zip(pos, ys):
                base = Coder.decode(y)

                current_position = (p[0].item(), p[1].item())
                result[c][current_position][base] += 1

    print('>> started processing of results')
    contigs = dataset.contigs
    records = []
    for contig in result:
        values = result[contig]

        sorted_positions = sorted(values)
        sorted_positions = list(itertools.dropwhile(lambda x: x[1] != 0, sorted_positions))

        first = sorted_positions[0][0]
        contig_data = contigs[contig]
        seq = contig_data[0][:first]

        for _, p in enumerate(sorted_positions):
            base, _ = values[p].most_common(1)[0]
            if base == Coder.GAP: continue
            seq += base

        last_position = sorted_positions[-1][0]
        seq += contig_data[0][last_position+1:]

        seq = Seq(seq)
        record = SeqRecord.SeqRecord(seq, id=contig)
        records.append(record)

    with open(args.out_path, 'w') as f:
        SeqIO.write(records, f, 'fasta')
Ejemplo n.º 17
0
def make_nvmnet(programs=None, registers=None):

    # default program
    if programs is None:
        programs = {
            "test":
            """
    
                mov d2 true
        loop:   mov d1 here
                jmp d1
        here:   mov d0 d2
                exit
    
        """
        }

    # set up activator
    activator, learning_rule = logistic_activator, hebbian
    # activator, learning_rule = tanh_activator, hebbian

    # make network
    layer_shape = (16, 16)
    layer_size = layer_shape[0] * layer_shape[1]
    pad = 0.01
    act = activator(pad, layer_size)

    # default devices
    # changing devices to registers
    if registers is None:
        registers = {
            "d%d" % d: Layer("d%d" % d, layer_shape, act, Coder(act))
            for d in range(3)
        }

    # assemble and link programs
    # changing devices to registers
    nvmnet = NVMNet(layer_shape, pad, activator, learning_rule, registers)
    for name, program in programs.items():
        nvmnet.assemble(program, name, verbose=1)
    nvmnet.link(verbose=1)

    # initialize pointer at last program
    nvmnet.activity["ip"] = nvmnet.layers["ip"].coder.encode(name)

    return nvmnet
Ejemplo n.º 18
0
    def choose_image_clicked(self, widget):
        dialog = Gtk.FileChooserDialog(
            "Please choose an image", self, Gtk.FileChooserAction.OPEN,
            (Gtk.STOCK_CANCEL, Gtk.ResponseType.CANCEL, Gtk.STOCK_OPEN,
             Gtk.ResponseType.OK))

        self.add_filters(dialog)
        response = dialog.run()
        if response == Gtk.ResponseType.OK:
            filename = dialog.get_filename()
            coder = Coder().image_base64_encode(filename, self.dataurl)
            self.textbuffer_result.set_text(coder)
            self.textbuffer.set_text("Image encoded: " + filename)
            self.textbuffer_result.set_text(coder)
        elif response == Gtk.ResponseType.CANCEL:
            self.textbuffer.set_text("Canceled")
        dialog.destroy()
Ejemplo n.º 19
0
def test_digits(verbose=False):
    epochs = 1000
    pad = 0.0001
    feedback = True
    split_learn = True
    biases = True
    Ns = [256 for _ in range(3)]
    tokens = [str(x) for x in range(100)]

    net = CHL_Net(Ns, feedback, split_learn, biases)

    # Input/output pattern pairs (0-99)
    in_coder = Coder(tanh_activator(pad, (Ns[0])))
    out_coder = Coder(tanh_activator(pad, Ns[-1]))
    patterns = [(in_coder.encode(tok), out_coder.encode(tok))
                for tok in tokens]

    net.train(epochs, patterns, verbose=verbose)
Ejemplo n.º 20
0
    def predict(self, id, checker):
        # Load all bugs
        print("Loading bug data...")
        ids = []
        if id == -1:
            bugs = self.ccdb.getAllBugsForChecker(checker)
            ids = [x[0] for x in bugs]
        else:
            ids.append(id)

        # Loading model
        print("Loading model...")
        model = load_model(config.cfModelFilenameFormat.format(checker))
        model.summary()
        vLabels = ['NOT OK', 'OK', 'Skipped']

        # Initialize coder
        print("Initializing coder...")
        self.dictionary = Dictionary(checker)
        self.coder = Coder(self.dictionary)
        self.totalDictionaryLength = self.dictionary.length()

        # Predicting
        print("Starting predictions...")
        for i in ids:
            allData = self.ccdb.getBugData(i)
            if allData.getChecker(
            ) not in globals.availableCheckers or allData.getChecker(
            ) != checker:
                print("Bug #{0} - checker not supported".format(i))
            else:
                # Load extra tokens from checker message
                checkerInfo = self.checkers.extractTokensForChecker(
                    allData.getChecker(), allData.getMessage())
                # Retrieve code fragment with bug
                fileRelativePath = self.convertFilePathToRepoRelativePath(
                    allData.getFile())
                fullCodeWithBug = self.vcs.getFileContents(
                    fileRelativePath, self.commits[self.currentCommitIndex])
                extractor = CodeExtractor(allData)
                extractor.loadCodeFromText(fullCodeWithBug)
                extractor.extractBugCode()
                bugCodeFragment = extractor.getBugCodeFragment()
                fixCodeFragment = ''
                # Encode it
                encodedBugData, initialUnkList = self.coder.encode(
                    bugCodeFragment, checkerData=checkerInfo)
                # Convert to one-hot
                MODEL_X_MAX_LEN = model.get_layer(index=0).input_shape[1]
                if len(encodedBugData) > MODEL_X_MAX_LEN:
                    print(
                        "Bug #{0} - Code too big for model, ignored".format(i))
                    continue
                elif id == -1:
                    print("Bug #{0} - Good to go".format(i))
                    continue
                noZerosToPad = MODEL_X_MAX_LEN - len(encodedBugData)
                if noZerosToPad > 0:
                    encodedBugData = self.coder.applyPadding(
                        encodedBugData, noZerosToPad)
                X = np.zeros((1, MODEL_X_MAX_LEN, self.totalDictionaryLength))
                X[0] = self.coder.convertToOneHot(
                    encodedBugData,
                    np.zeros((MODEL_X_MAX_LEN, self.totalDictionaryLength)))
                # Predict and convert from one-hot
                Y = self.coder.convertFromOneHot(model.predict(X)[0])
                print(Y)
                # Decode
                Y = self.coder.removePadding(Y)
                fixCodeFragment = self.coder.decode(Y, initialUnkList)

                #Verify?
                vStatus = 2
                if config.cfVerifyPrediction:
                    # Apply fix in source code file
                    extractor.applyFix(fixCodeFragment)
                    extractor.saveToFile(allData.getFile())
                    # Run CodeChecker and analyze code
                    self.codeChecker.check(True)
                    resolvedIds = self.getDiffResolvedIds()
                    # Check if ID is resolved in tmp folder
                    isFixed = i in resolvedIds
                    # Set vStatus accordingly
                    if isFixed:
                        vStatus = 1
                    else:
                        vStatus = 0
                #Print
                print("Bug #{0} - summary".format(i))
                print("== Code fragment with bug ==")
                print(bugCodeFragment)
                print("== Suggested fix ==")
                print(fixCodeFragment)
                print("Verification: {0}".format(vLabels[vStatus]))
                a = ' '
                while a != 'y' and a != 'n':
                    a = input("Apply fix? (y/n): ")
                if a == 'y':
                    if not config.cfVerifyPrediction:
                        # Apply fix in source code file
                        extractor.applyFix(fixCodeFragment)
                        extractor.saveToFile(allData.getFile())
                elif config.cfVerifyPrediction:
                    # Revert file contents
                    self.vcs.checkout(self.commits[self.currentCommitIndex])
                print('Done')
        print("All done, exiting...")
Ejemplo n.º 21
0
def runCoderNormal(cfgFile):
    normalCoder = Coder(coderNormal)
    normalCoder.run(cfgFile)
Ejemplo n.º 22
0
    return W, Z, residual


if __name__ == '__main__':

    np.set_printoptions(linewidth=200,
                        formatter={'float': lambda x: '% .2f' % x})

    N = 8
    PAD = 0.05

    from activator import *
    act = tanh_activator(PAD, N)
    # act = logistic_activator(PAD, N)

    c = Coder(act)

    g = Layer("gates", N, act, c)
    input_layers = {
        name: Layer(name, N, act, c)
        for name in ["gates", "op1", "op2"]
    }
    s = Sequencer(g, input_layers)
    v_old = g.coder.encode("SET")  # s.add_transit(new_state="SET")
    for to_layer in ["FEF", "SC"]:
        for from_layer in ["FEF", "SC"]:
            v_new = s.add_transit(new_state=to_layer + from_layer,
                                  gates=v_old,
                                  op1=to_layer,
                                  op2=from_layer)
Ejemplo n.º 23
0
    def main(self):
        # Do analysis
        shutil.rmtree(config.getTmpDir())
        self.codeChecker.check(True)

        # Diff new
        newBugs = self.getDiffNew()

        if len(newBugs) < 1:
            print('No new bugs introduced, commit is accepted!')
            return
        
        print("New bugs found! Count: {0}. Attempting repairs...".format(len(newBugs)))

        # Load models
        models = {}
        for checker in globals.availableCheckers:
            models[checker] = load_model(config.cfModelFilenameFormat.format(checker))

        # Load all content from files having new
        files = set([self.convertFilePathToRepoRelativePath(x.getFile()) for x in newBugs])
        fileContents = {}
        for f in files:
            fn = config.getRepoDir() + f
            with open(fn, 'r') as fh:
                fileContents[f] = ''.join(fh.readlines())

        # For each file sort by bug line desc
        suggestions = []
        validSuggestions = 0
        for f in files:
            bugs = [x for x in newBugs if self.convertFilePathToRepoRelativePath(x.getFile()) == f]
            bugs.sort(key=lambda x: x.getLine(), reverse=True)
            print("=== File: {0} ===".format(f))
            # For each bug get a suggestion and test it
            for b in bugs:
                print("L{0}, Type: {1}".format(b.getLine(), b.getChecker()))
                # Prepare useful data
                dictionary = Dictionary(b.getChecker())
                coder = Coder(dictionary)
                totalDictionaryLength = dictionary.length()
                # Prepare and extract bug fragment
                checkerInfo = self.checkers.extractTokensForChecker(b.getChecker(), b.getMessage())
                extractor = CodeExtractor(b)
                extractor.loadCodeFromText(fileContents[f])
                extractor.extractBugCode()
                bugCodeFragment = extractor.getBugCodeFragment()
                fixCodeFragment = ''
                # Encode it
                encodedBugData, initialUnkList = coder.encode(bugCodeFragment, checkerData = checkerInfo)
                # Convert to one-hot
                MODEL_X_MAX_LEN = models[b.getChecker()].get_layer(index = 0).input_shape[1]

                if len(encodedBugData) > MODEL_X_MAX_LEN:
                    print("Ignored: Code too big for model")
                    continue

                noZerosToPad = MODEL_X_MAX_LEN - len(encodedBugData)
                if noZerosToPad > 0:
                    encodedBugData = coder.applyPadding(encodedBugData, noZerosToPad)
                X = np.zeros((1, MODEL_X_MAX_LEN, totalDictionaryLength))
                X[0] = coder.convertToOneHot(encodedBugData, np.zeros((MODEL_X_MAX_LEN, totalDictionaryLength)))
                # Predict and convert from one-hot
                Y = coder.convertFromOneHot(models[b.getChecker()].predict(X)[0])
                Y = coder.removePadding(Y)
                # Decode
                fixCodeFragment = coder.decode(Y, initialUnkList)[:-1]
                
                #Verify?
                vStatus = 2
                if config.cfVerifyPrediction:
                    # Apply fix in source code file
                    extractor.applyFix(fixCodeFragment)
                    extractor.saveToFile(b.getFile())
                    # Run CodeChecker and analyze code
                    shutil.rmtree(config.getTmpDir())
                    compilationLog = self.codeChecker.check(True)
                    newBugsAfterFix = self.getDiffNew()
                    # Check if ID is resolved in tmp folder
                    isFixed = 'Build failed' not in compilationLog
                    for nb in newBugsAfterFix:
                        if self.isBugDataEqual(b, nb):
                            isFixed = False
                    # Set vStatus accordingly
                    if isFixed:
                        vStatus = 1
                    else:
                        vStatus = 0
                    # Revert file
                    extractor.loadCodeFromText(fileContents[f])
                    extractor.saveToFile(b.getFile())
                if vStatus == 0:
                    print("Verification: Negative, cannot be applied")
                elif vStatus == 1:
                    print("Verification: Positive, can be applied")
                    validSuggestions += 1
                elif vStatus == 2:
                    print("Verification: Skipped")
                    validSuggestions += 1
                sugg = SuggestionData(f, b, bugCodeFragment, fixCodeFragment, vStatus)
                suggestions.append(sugg)
        print("Valid suggestions prepared for {0} / {1} bugs.".format(validSuggestions, len(newBugs)))

        if validSuggestions > 0:
            print("Apply valid suggestions (a), display them (d), ignore them (i) or abort commit (q)?")
            apply = False
            choice = True
            while choice:
                c = sys.stdin.read(1)
                if c == 'a':
                    apply = True
                    choice = False
                    print("Applying fixes...")
                elif c == 'i':
                    choice = False
                    print("Fixes ignored...")
                elif c == 'd':
                    self.displaySuggestions(suggestions)
                    print("Apply valid suggestions (a), ignore them (i) or abort commit (q)?")
                elif c == 'q':
                    print("Aborting commit...")
                    sys.exit(1)
            if apply:
                self.applyValidFixes(suggestions, files)
                print("Fixes applied!")
        if validSuggestions != len(newBugs):
            print("Unable to fix all bugs, continue with commit (c) or abort (q)?")
            choice = True
            while choice:
                c = sys.stdin.read(1)
                if c == 'c':
                    choice = False
                    print("Continuing...")
                elif c == 'q':
                    print("Aborting commit...")
                    sys.exit(1)
        else:
            print("Bugs corrected, commit is good to go!")
Ejemplo n.º 24
0
    def build(self, checker, startK, startBatch):
        # Initialize coder
        print("Initializing coder...")
        self.dictionary = Dictionary(checker)
        self.coder = Coder(self.dictionary)
        self.totalDictionaryLength = self.dictionary.length(
        )  # + globals.firstAvailableToken

        # Load training data from file
        print("Loading training data...")
        data = []
        with open(config.cfTrainFilenameFormat.format(checker), "r") as f:
            data = f.readlines()
        random.shuffle(data)
        dataLen = len(data)
        print("Done, fetched {0} records".format(dataLen))
        if dataLen < 1:
            print("No data found")
            return

        # Json load
        print("Converting to objects...")
        self.X = []
        self.Y = []
        self.ObjInd = 0
        self.ObjMax = dataLen
        xMaxLen = 0
        yMaxLen = 0
        for record in data:
            obj = json.loads(record[:-1])
            self.X.append(obj['x'])
            self.Y.append(obj['y'])
            if len(obj['x']) > xMaxLen:
                xMaxLen = len(obj['x'])
            if len(obj['y']) > yMaxLen:
                yMaxLen = len(obj['y'])

        # Padding
        print("Counted input and output lengths (X = {0}, Y = {1})...".format(
            xMaxLen, yMaxLen))

        # Preparing model
        print("Preparing model...")
        batchSaveIndex = 0
        batchSaveCounter = 0
        batchSaveThreshold = 10000

        if startK == 0 and startBatch == 0:
            model = Sequential()
            model.add(
                LSTM(config.cfTrainHiddenSize,
                     input_shape=(xMaxLen, self.totalDictionaryLength)))
            model.add(RepeatVector(yMaxLen))
            for _ in range(config.cfTrainNumLayers):
                model.add(LSTM(config.cfTrainHiddenSize,
                               return_sequences=True))
            model.add(TimeDistributed(Dense(self.totalDictionaryLength)))
            model.add(Activation('softmax'))
            model.compile(loss='categorical_crossentropy',
                          optimizer='rmsprop',
                          metrics=['accuracy'])
        else:
            modelFormat = 'checkpoint_epoch_{0}.{1}.h5'.format(
                startK - 1, checker)
            if startBatch > 0:
                batchSaveIndex = int(startBatch / batchSaveThreshold)
                modelFormat = 'checkpoint_epoch_b{2}.{0}.{1}.h5'.format(
                    startK, checker, batchSaveIndex - 1)
            model = load_model(modelFormat)
        '''
        print("Converting data...")
        X_s = np.zeros((dataLen, xMaxLen, self.totalDictionaryLength))
        Y_s = np.zeros((dataLen, yMaxLen, self.totalDictionaryLength))
        for j in range(dataLen):
            valueX = X[j]
            noZerosToPad = xMaxLen - len(valueX)
            if noZerosToPad > 0:
                valueX = self.coder.applyPadding(valueX, noZerosToPad)
            valueY = Y[j]
            noZerosToPad = yMaxLen - len(valueY)
            if noZerosToPad > 0:
                valueY = self.coder.applyPadding(valueY, noZerosToPad)
            self.coder.convertToOneHot(valueX, X_s[j])
            self.coder.convertToOneHot(valueY, Y_s[j])
        '''
        # Training model
        '''
        print("Training...")
        for k in range(startK, config.cfTrainNoEpochs):
            #model.fit(X_s, Y_s, epochs=1)#, validation_split=0.2)
            model.fit(self.datagen(config.cfTrainBatchSize, xMaxLen, yMaxLen), epochs=1, steps_per_epoch=103)#, validation_split=0.2)
            model.save('checkpoint_epoch_{0}.{1}.h5'.format(k, checker))
        '''
        #"""
        print("Training model...")
        for k in range(startK, config.cfTrainNoEpochs):
            i = 0
            model.reset_metrics()
            if k == startK:
                i = startBatch
            while i < dataLen:
                end = i + config.cfTrainBatchSize
                if end > dataLen:
                    end = dataLen
                #'''
                X_s = np.zeros((end - i, xMaxLen, self.totalDictionaryLength))
                Y_s = np.zeros((end - i, yMaxLen, self.totalDictionaryLength))
                for j in range(i, end):
                    valueX = self.X[j]
                    noZerosToPad = xMaxLen - len(
                        valueX)  #int((xMaxLen - len(valueX)) / 2)
                    if noZerosToPad > 0:
                        valueX = self.coder.applyPadding(valueX, noZerosToPad)
                    valueY = self.Y[j]
                    noZerosToPad = yMaxLen - len(
                        valueY)  #int((yMaxLen - len(valueY)) / 2)
                    if noZerosToPad > 0:
                        valueY = self.coder.applyPadding(valueY, noZerosToPad)
                    zerosX = np.zeros((xMaxLen, self.totalDictionaryLength))
                    zerosY = np.zeros((yMaxLen, self.totalDictionaryLength))
                    X_s[j - i] = self.coder.convertToOneHot(valueX, zerosX)
                    Y_s[j - i] = self.coder.convertToOneHot(valueY, zerosY)
                result = model.train_on_batch(X_s, Y_s, reset_metrics=False)
                #'''
                #result = model.train_on_batch(X_s[i:end], Y_s[i:end])
                #del X_s
                #del Y_s
                print(
                    "[{2}] Done batch {0}-{1} (loss: {3:.3f}, accuracy: {4:.3f})"
                    .format(i, end, k, result[0], result[1]))
                i += config.cfTrainBatchSize
                batchSaveCounter += config.cfTrainBatchSize
                if batchSaveCounter >= batchSaveThreshold:
                    batchSaveCounter = 0
                    model.save('checkpoint_epoch_b{2}.{0}.{1}.h5'.format(
                        k, checker, batchSaveIndex))
                    batchSaveIndex += 1
            model.save('checkpoint_epoch_{0}.{1}.h5'.format(k, checker))
            batchSaveIndex = 0
            batchSaveCounter = 0
        #"""
        print("All done, exiting...")
Ejemplo n.º 25
0
    def __init__(self, nvmnet, opdef, arg_regs, res_regs, op_reg):
        self.opdef = opdef
        self.op_name = opdef.op_name
        self.operations = dict(opdef.operations)
        self.in_ops = list(opdef.in_ops)
        self.out_ops = list(opdef.out_ops)
        self.tokens = list(opdef.tokens)

        self.arg_registers = arg_regs
        self.res_registers = res_regs
        self.op_register = op_reg

        self.hidden_name = "%s_gh" % self.op_name
        self.gate_name = "%s_go" % self.op_name

        # 1. OP->HID, 2. OP->OP, [3. RESX->RESX, 4. RESX->RESY for op in ops]
        self.gate_map = GateMap([(self.hidden_name, self.op_register, "u"),
                                 (self.op_register, self.op_register, "u")] +
                                [("res", "res", op)
                                 for op in self.operations] +
                                [("res", "arg", op) for op in self.operations])

        # Hidden gate layer
        N = 16
        self.hidden_size = N**2
        hid_activator = tanh_activator(nvmnet.pad, self.hidden_size)
        self.hidden_layer = Layer(self.hidden_name, (N, N), hid_activator,
                                  Coder(hid_activator))

        # Gate output layer
        self.gate_size = self.gate_map.get_gate_count()
        gate_activator = heaviside_activator(self.gate_size)
        self.gate_layer = Layer(self.gate_name, (self.gate_size, 1),
                                gate_activator, Coder(gate_activator))

        # Gate layer (detects operator)
        hidden_gate_layer = {
            "name": self.hidden_name,
            "neural model": "nvm",
            "rows": 1,
            "columns": self.hidden_size,
        }
        gate_layer = {
            "name": self.gate_name,
            "neural model": "nvm_heaviside",
            "rows": 1,
            "columns": self.gate_size,
        }
        self.structure = {
            "name": self.op_name,
            "type": "parallel",
            "layers": [hidden_gate_layer, gate_layer]
        }

        # Make gate connection
        def build_gate(to_name, index, suffix=""):
            return {
                "name": get_conn_name(to_name, self.gate_name, suffix),
                "from layer": self.gate_name,
                "to layer": to_name,
                "type": "subset",
                "opcode": "add",
                "subset config": {
                    "from row end": 1,
                    "from column start": index,
                    "from column end": index + 1,
                    "to row end": 1,
                    "to column end": 1,
                },
                "plastic": False,
                "gate": True,
            }

        # Squash weights to cancel gain
        def build_squash(to_name, suffix="", gated=True):
            return {
                "name": get_conn_name(to_name, to_name, suffix),
                "from layer": "bias",
                "to layer": to_name,
                "type": "fully connected",
                "opcode": "add",
                "plastic": False,
                "gated": gated,
            }

        # Make weight/bias connections
        def build_conns(to_name, from_name, suffix="", gated=True):
            return [{
                "name": get_conn_name(to_name, from_name, suffix + "-w"),
                "from layer": from_name,
                "to layer": to_name,
                "type": "fully connected",
                "opcode": "add",
                "plastic": False,
                "gated": gated
            }, {
                "name": get_conn_name(to_name, from_name, suffix + "-b"),
                "from layer": 'bias',
                "to layer": to_name,
                "type": "fully connected",
                "opcode": "add",
                "plastic": False,
                "gated": gated
            }]

        self.connections = []

        # Hidden gate input
        self.connections.append(
            build_gate(
                self.hidden_name,
                self.gate_map.get_gate_index(
                    (self.hidden_name, self.op_register, "u"))))
        self.connections += build_conns(self.hidden_name,
                                        self.op_register,
                                        gated=True)

        # Hidden gate recurrence
        self.connections += build_conns(self.hidden_name,
                                        self.hidden_name,
                                        gated=False)

        # Gate activation
        self.connections += build_conns(self.gate_name,
                                        self.hidden_name,
                                        gated=False)

        # Operation squash
        self.connections.append(
            build_gate(
                self.op_register,
                self.gate_map.get_gate_index(
                    (self.op_register, self.op_register, "u")), self.op_name))
        self.connections.append(
            build_squash(self.op_register, suffix=self.op_name + "-squash"))

        for op in self.operations:
            for to_name in self.res_registers:
                # Recurrent connections
                self.connections.append(
                    build_gate(
                        to_name,
                        self.gate_map.get_gate_index(("res", "res", op)),
                        op + "-1"))
                self.connections += build_conns(to_name,
                                                to_name,
                                                suffix=op,
                                                gated=True)

                # Inter-layer connections
                self.connections.append(
                    build_gate(
                        to_name,
                        self.gate_map.get_gate_index(("res", "arg", op)),
                        op + "-2"))
                for from_name in self.arg_registers:
                    if to_name != from_name:
                        self.connections += build_conns(to_name,
                                                        from_name,
                                                        suffix=op,
                                                        gated=True)

        self.layer_map = {
            name: nvmnet.layers[name]
            for name in self.arg_registers + self.res_registers +
            [self.op_register]
        }
        self.layer_map[self.gate_name] = self.gate_layer
        self.layer_map[self.hidden_name] = self.hidden_layer

        self.conn_names = tuple(conn["name"] for conn in self.connections)
Ejemplo n.º 26
0
# 3., 4., 5.
model1 = load_model(config.cfModelFilenameFormat.format('deadcode.DeadStores'))
model2 = load_model(
    config.cfModelFilenameFormat.format(
        'clang-diagnostic-tautological-constant-out-of-range-compare'))
model3 = load_model(
    config.cfModelFilenameFormat.format('clang-diagnostic-unused-parameter'))
model4 = load_model(
    config.cfModelFilenameFormat.format(
        'clang-diagnostic-constant-conversion'))
dictionary1 = Dictionary('deadcode.DeadStores')
dictionary2 = Dictionary(
    'clang-diagnostic-tautological-constant-out-of-range-compare')
dictionary3 = Dictionary('clang-diagnostic-unused-parameter')
dictionary4 = Dictionary('clang-diagnostic-constant-conversion')
coder1 = Coder(dictionary1)
coder2 = Coder(dictionary2)
coder3 = Coder(dictionary3)
coder4 = Coder(dictionary4)
totalDictionaryLength1 = dictionary1.length()
totalDictionaryLength2 = dictionary2.length()
totalDictionaryLength3 = dictionary3.length()
totalDictionaryLength4 = dictionary4.length()


def ProcessBugsInFile(fileName):
    # 5.1.
    # 5.2.
    for bug in bugsPerFile[fileName]:
        bugData = bugDataList[bug]
        cleanFn = fileName[:-4]
Ejemplo n.º 27
0
    def __init__(self, N, mask_frac, conv=1., stabil=10):
        N = int(N / conv)

        self.stabil = stabil
        self.mask_frac = mask_frac

        self.size = N**2
        self.mask_size = int(self.size / self.mask_frac)
        pad = 0.0001

        # Build mem/ptr/ctx unit
        self.prefix = "test"
        layer_configs, connections = build_unit(self.prefix, N, "graph_net",
                                                conv, pad)

        # Assign gate indices
        self.gate_layer_name = "g"
        self.gates = {}
        for conn in connections:
            if any(
                    conn.get(key, False)
                    for key in ["gate", "decay", "learning"]):
                conn["from layer"] = self.gate_layer_name
                conn["subset config"]["from column start"] = len(self.gates)
                conn["subset config"]["from column end"] = len(self.gates) + 1
                self.gates[conn["name"]] = len(self.gates)

        # Build gate layer
        layer_configs.append(
            build_layer(self.gate_layer_name, "nvm_heaviside", 1,
                        len(self.gates), pad))

        structure = {
            "name": "graph_net",
            "type": "parallel",
            "layers": layer_configs
        }

        self.net = Network({
            "structures": [structure],
            "connections": connections
        })

        ### Create activators and coders
        self.act = tanh_activator(pad, self.size)
        self.act_h = heaviside_activator(self.size)

        self.layer_names = [
            self.prefix + "m", self.prefix + "p", self.prefix + "c",
            self.gate_layer_name
        ]

        self.acts = {
            self.prefix + "m": self.act,
            self.prefix + "p": self.act,
            self.prefix + "c": self.act_h,
            self.gate_layer_name: self.act_h,
        }
        self.coders = {
            self.prefix + "m": Coder(self.act),
            self.prefix + "p": Coder(self.act),
            self.prefix + "c": Coder(self.act_h),
            self.gate_layer_name: Coder(self.act_h),
        }
Ejemplo n.º 28
0
def runCoder(cfgFile):
    coder = Coder(coderNormal)
    coder.run(cfgFile)
Ejemplo n.º 29
0

if __name__ == '__main__':

    np.set_printoptions(linewidth=200,
                        formatter={'float': lambda x: '% .2f' % x})

    N = 8
    PAD = 0.05

    from activator import *
    # act_fun = tanh_activator
    act_fun = logistic_activator

    act = act_fun(PAD, N)
    coder = Coder(act)

    layer_names = ['mem', 'ip', 'opc', 'op1', 'op2', 'op3']
    layers = [Layer(name, N, act, coder) for name in layer_names]

    NL = len(layers) + 2  # +2 for gate out/hidden
    NG = NL**2 + NL
    NH = 100
    actg = heaviside_activator(NG)
    acth = act_fun(PAD, NH)
    gate_output = Layer('gates', NG, actg, Coder(actg))
    gate_hidden = Layer('ghide', NH, acth, Coder(acth))
    layers.extend([gate_hidden, gate_output])

    gate_map = gm.make_nvm_gate_map([layer.name for layer in layers])
    gs = GateSequencer(gate_map, gate_output, gate_hidden,
Ejemplo n.º 30
0
from coder import Coder, MorseCoder
import string

if __name__ == "__main__":
    # By default, the translator will encode files by switching them to uppercase
    translator = Coder(string.ascii_lowercase, string.ascii_uppercase)

    while (1):
        line = raw_input()
        coded = translator.encode(line)
        print coded
        print translator.decode(coded)
Ejemplo n.º 31
0
def runCoderHeuristic(cfgFile):
    heuristicCoder = Coder(coderHeuristic)
    heuristicCoder.run(cfgFile)
Ejemplo n.º 32
0
def test(N, pad, mask_frac, mappings, stabil=5):
    fsm_states = mappings.keys() + list(
        set(v for m in mappings.values() for k, v in m))
    input_states = list(set(k for m in mappings.values() for k, v in m))

    shape = (N, N)
    size = N**2

    act = tanh_activator(pad, size)
    act_log = logistic_activator(pad, size)
    input_layer, fsm_layer = (Layer(k, shape, act, Coder(act)) for k in "ab")
    input_layer.encode_tokens(input_states, orthogonal=True)
    fsm_layer.encode_tokens(fsm_states, orthogonal=True)

    ########### OLD METHOD ###################

    # Learn recurrent weights
    w_r = np.zeros((size, size))
    b = np.zeros((size, 1))
    X = fsm_layer.encode_tokens(fsm_states)
    dw, db = rehebbian(w_r, b, X, X, act, act)
    w_r = w_r + dw

    # Learn inter-regional weights
    w = np.zeros((size, size * 2))
    b = np.zeros((size, 1))
    for s, m in mappings.items():
        X = input_layer.encode_tokens([k for k, v in m])
        s = np.repeat(fsm_layer.coder.encode(s), X.shape[1], axis=1)
        X = np.concatenate((X, s), axis=0)

        Y = fsm_layer.encode_tokens([v for k, v in m])
        dw, db = rehebbian(w, b, X, Y, act, act)
        w = w + dw

    # Test
    correct = 0
    weighted = 0.
    total = 0
    for start, m in mappings.items():
        start = fsm_layer.coder.encode(start)

        for inp, end in m:
            x = np.concatenate((input_layer.coder.encode(inp), start), axis=0)
            y = act.f(w.dot(x))

            # Stabilize
            for _ in range(stabil):
                old_y = y
                y = act.f(w_r.dot(y))
                if np.array_equal(y, old_y):
                    break
            out = fsm_layer.coder.decode(y)

            if out == end:
                correct += 1
                weighted += 1.0
            else:
                weighted += float(
                    len(
                        np.where(
                            np.sign(y) == np.sign(fsm_layer.coder.encode(
                                end))))) / size
            total += 1
    old_acc = float(correct) / total
    weighted_old_acc = weighted / total

    ########### NEW METHOD ###################

    input_layer, fsm_layer = (Layer(k, shape, act, Coder(act)) for k in "ab")
    input_layer.encode_tokens(input_states, orthogonal=False)
    fsm_layer.encode_tokens(fsm_states, orthogonal=False)

    # Create gating masks for each state
    w_masks = {
        s: (np.random.random((size, size)) < (1. / mask_frac)).astype(np.float)
        for s in fsm_states
    }

    # Ensure nonzero masks
    for mask in w_masks.values():
        if np.sum(mask) == 0:
            mask[randint(0, mask.shape[0] - 1),
                 randint(0, mask.shape[1] - 1)] = 1.

    # Test learning of masks
    w_m = np.zeros((size**2, size))
    b = np.zeros((size**2, 1))
    X = fsm_layer.encode_tokens(fsm_states)
    Y = np.concatenate(tuple(w_masks[s].reshape(-1, 1) for s in fsm_states),
                       axis=1)
    dw, db = rehebbian(w_m, b, X, Y, act, act)
    w_m = w_m + dw
    '''
    for s in fsm_states:
        x = fsm_layer.coder.encode(s)
        y = act_log.f(w_m.dot(x))
        print(np.sum((y.reshape(size,size) > 0.5) != (w_masks[s] > 0.5)))
    '''

    # Learn recurrent weights
    w_r = np.zeros((size, size))
    b = np.zeros((size, 1))
    X = fsm_layer.encode_tokens(fsm_states)
    dw, db = rehebbian(w_r, b, X, X, act, act)
    w_r = w_r + dw

    # Learn inter-regional weights
    w = np.zeros((size, size))
    b = np.zeros((size, 1))
    for start, m in mappings.items():
        # Start state mask, input_layer input
        X = input_layer.encode_tokens([k for k, v in m])
        Y = fsm_layer.encode_tokens([v for k, v in m])

        w_mask = w_masks[start]
        dw, db = rehebbian(np.multiply(w, w_mask), b, X, Y, act, act)
        w = w + (np.multiply(dw, w_mask) * mask_frac)

    # Test
    total = 0
    weighted = 0.
    masked_weighted = 0.
    correct = 0
    masked_correct = 0
    for start, m in mappings.items():
        #w_masked = np.multiply(w_masks[start], w)

        x = fsm_layer.coder.encode(start)
        w_masked = np.multiply(w, act_log.f(w_m.dot(x)).reshape(size, size))
        #w_masked = np.multiply(w, (w_m.dot(x) > 0).astype(np.int).reshape(size,size))

        for inp, end in m:
            x = input_layer.coder.encode(inp)
            y = act.f(w_masked.dot(x))

            # Stabilize
            for _ in range(stabil + 1):
                old_y = y
                y = act.f(w_r.dot(y))
                if np.array_equal(y, old_y):
                    break
            out = fsm_layer.coder.decode(y)

            # Check output
            if out == end:
                correct += 1
                weighted += 1.0
            else:
                weighted += float(
                    len(
                        np.where(
                            np.sign(y) == np.sign(fsm_layer.coder.encode(
                                end))))) / size
            total += 1
    new_acc = float(correct) / total
    weighted_new_acc = weighted / total

    return {
        "old_acc": old_acc,
        "new_acc": new_acc,
        "weighted_old_acc": weighted_old_acc,
        "weighted_new_acc": weighted_new_acc
    }
Ejemplo n.º 33
0
"""
Python program to realize the
simple stenography which implements both 
coding and decoding part.

:Author: Manthan C S
:GitHub: mnthnx64
"""

from coder import Coder
from decoder import Decoder

if __name__ == '__main__':
    cdr = Coder("In all the examples so far, the elements of a are provided by the iterator one at a time, because all the looping logic is internal to the iterator. While this is simple and convenient, it is not very efficient. A better approach is to move the one-dimensional innermost loop into your code, external to the iterator. This way, NumPy’s vectorized operations can be used on larger chunks of the elements being visited.")
    cdr.encode()
    dcdr = Decoder()
    text = dcdr.decode()
    print(text)
Ejemplo n.º 34
0
def generate_train_data(args):
    """
    Generates train data for the region provided through arguments.

    Parameters
    ----------
    reads_path : path to the aligned reads file
    truth_genome_path : path to the truth genome
    ref : reference sequence
    region : region for which data is required

    Returns
    -------
    region_name : region name
    positions : positions corresponding provided region
    examples : examples corresponding provided region
    labels : labels corresponding provided region
    """

    reads_path, truth_genome_path, ref, region = args

    aligns = get_aligns(truth_genome_path, region)
    filtered_aligns = filter_aligns(aligns)

    print(f'>> finished generating labels for {region.name}:{region.start}-{region.end}')

    if not filtered_aligns: 
        print(f'>> no alignments')
        return None

    positions = []
    examples = []
    labels = []

    for align in filtered_aligns:
        position_label_dict = dict()
        positions_with_unknown_base = set()

        pos, lbls = get_postions_and_labels(align, ref, region)
        for position, label in zip(pos, lbls):
            if label == Coder.encode(Coder.UNKNOWN):
                positions_with_unknown_base.add(position)
            else:
                position_label_dict[position] = label

        sorted_positions = sorted(list(position_label_dict.keys()))
        region_string = f'{region.name}:{sorted_positions[0][0] + 1}-{sorted_positions[-1][0]}'
        result = gen.generate_features(reads_path, str(ref), region_string)

        for P, X in zip(*result):
            Y = []
            to_yield = True

            for p in P:
                assert is_in_region(p[0], filtered_aligns)

                if p in positions_with_unknown_base:
                    to_yield = False
                    break

                try:
                    y_label = position_label_dict[p]
                except KeyError:
                    if p[1] != 0:
                        y_label = Coder.encode(Coder.GAP)
                    else:
                        raise KeyError(f'error: No label mapping for position {p}!')

                Y.append(y_label)

            if to_yield:
                positions.append(P)
                examples.append(X)
                labels.append(Y)

    print(f'>> finished generating examples for {region.name}:{region.start}-{region.end}')
    return region.name, positions, examples, labels
Ejemplo n.º 35
0
def main():
    start = time.time()

    # a = []
    # b = []

    # for i in range(0, 30):

    generated_array = Generator().populate_array(2**18)
    # print(generated_array)

    coded_array = Coder().triple_code(generated_array)
    # print(coded_array)

    # a.append(i / 100)
    # distorted_array = Channel(i/100).distort(coded_array)

    distorted_array = Channel(0.15).distort(coded_array)
    # print(distorted_array)

    decoded_array = Decoder().decode(distorted_array)
    # print(decoded_array)

    print(error_factor(generated_array, decoded_array))
    # b.append(error_factor(generated_array, decoded_array))

    # print(a, b)
    # plt.plot(a, b)
    # plt.title("Error percentage depending on the probability p")
    # plt.xlabel("Probability of error p")
    # plt.ylabel("Error factor in %")
    # plt.show()
    end = time.time()
    print("Time:", end-start)

    """
    Etap II
    """

    print("Stage 2")
    # 2048

    bch = BCH()

    a = []
    b = []
    error = 0

    sent_msg = b_util.urandom(512)

    m = 8
    t = 63
    k = 9

    # 7_21_29

    # received_msg = BCH.code(sent_msg, m, t, k, 0.3)

    # for i in range(0, 20, 1):
    #     for j in range(10):
    #         received_msg = BCH.code(sent_msg, 3, 1, 4, i/100)

    #         filled_array = fill_with_zeros(sent_msg, len(received_msg))

    #         # print(len(example_bit_array), len(received_msg))
    #         # # print(received_msg)
    #         # print("Error [%] - decoded msg: ", error_factor(example_bit_array, received_msg))
    #         error += error_factor(filled_array, received_msg)
    #     error /= 10
    #     a.append(i/100)
    #     b.append(error)

    # print(a, b)
    # plt.plot(a, b)

    # plt.title(
    #     f"Error percentage depending on the probability p \nfor m = {m} k = {k} t = {t}")
    # plt.xlabel("Probability of error p")
    # plt.ylabel("Error factor in %")
    # plt.show()

    for i in range(0,20,3):
        counter = 0
        for m in bch_code_parameters:
            for t in bch_code_parameters[m]:
                counter +=1

                received_msg = BCH.code(sent_msg, m, t, bch_code_parameters[m][t], i/100)


                filled_array = fill_with_zeros(sent_msg, len(received_msg))
                # print(len(example_bit_array), len(received_msg))
                # # print(received_msg)
                # print("Error [%] - decoded msg: ", error_factor(example_bit_array, received_msg))
                error += error_factor(filled_array, received_msg)
        a.append(i/100)
        b.append(error/counter)
        error = 0
        print(counter)
    print(a, b)
    plt.plot(a, b)
    plt.title("Error percentage depending on the probability p")
    plt.xlabel("Probability of error p")
    plt.ylabel("Error factor in %")
    plt.show()