Beispiel #1
0
 def opening_handshake(self, syn_number):
     syn = Packet(data=b"",
                  header=header.Header(flags=header.SYN,
                                       windows=self.window_size,
                                       syn_number=syn_number))
     syn.header.genchecksum(syn.data)
     self.send_buf.put(syn, timeout=self.timeout)
     self.original_syn = syn_number
     sent = time.time()
     print("sending syn and waiting for recv")
     while not self.connected:
         try:
             syn_ack = self.recv_buf.get(timeout=self.timeout)
             self.cur_stream_id = syn_ack.header.stream_id
             print(syn_ack.header.flags)
             print(syn_ack.header.SYN_number)
             print(syn_ack.header.ACK_number)
             ack = Packet(data=b"",
                          header=header.Header(
                              stream_id=syn_ack.header.stream_id,
                              ack_number=syn_ack.header.ACK_number,
                              flags=header.ACK,
                              windows=syn_ack.header.windows))
             ack.header.genchecksum(ack.data)
             self.send_buf.put(ack)
             self.window_size = syn_ack.header.windows
             self.connected = True
         except Empty:
             pass
         if time.time() - sent >= self.timeout:
             self.send_buf.put(syn, timeout=self.timeout)
         print(self.original_syn)
         print(self.connected)
     print("done opening handshake")
     return True
Beispiel #2
0
 def testReprSanity(self):
     """Header __repr__, __eq__, __ne__ should give sane results"""
     for stuff in self.knownValues:
         wikiline = stuff[0]
         h = header.Header(wikiline)
         self.assertEqual(h, eval(repr(h)))
         self.assertNotEqual(h, header.Header())
Beispiel #3
0
 def closing_handshake(self):
     fin = Packet(data=b"",
                  header=header.Header(stream_id=self.cur_stream_id,
                                       syn_number=(self.original_syn +
                                                   self.num_of_packets + 1),
                                       flags=FIN,
                                       windows=self.window_size))
     fin.header.genchecksum(fin.data)
     self.send_buf.put(fin)
     sent = time.time()
     print("sending fin")
     while self.connected:
         try:
             fin_ack = self.recv_buf.get(timeout=self.timeout)
             self.cur_stream_id = fin_ack.header.stream_id
             print(fin_ack.header.flags)
             print(fin_ack.header.SYN_number)
             print(fin_ack.header.ACK_number)
             ack = Packet(data=b"",
                          header=header.Header(
                              stream_id=fin_ack.header.stream_id,
                              ack_number=fin_ack.header.SYN_number,
                              flags=header.ACK,
                              windows=fin_ack.header.windows))
             ack.header.genchecksum(ack.data)
             self.send_buf.put(ack)
             self.connected = False
         except Empty:
             pass
         if time.time() - sent >= self.timeout:
             self.send_buf.put(fin, timeout=self.timeout)
     print("closing handshake done")
     print("connection: ")
     print(self.connected)
Beispiel #4
0
    def get_header(self):
        """Returns the liblas.header.Header for the file"""
        if not self.handle:
            return None
        
        if self.mode == 0:
            return lasheader.Header(handle=core.las.LASReader_GetHeader(self.handle), owned=True)
        else:
            return lasheader.Header(handle=core.las.LASWriter_GetHeader(self.handle), owned=True)

        return None
Beispiel #5
0
def Tohtml(url):
    flag = True
    error_count = 0
    while flag:
        try:
            r = get(url, headers=header.Header())
            flag = False
        except:
            error_count += 1
            if error_count > 10:
                return "error network"
            sleep(10)

    html = BeautifulSoup(r.text, "html.parser")
    title = html.select('div[class="topTlt"] p[class="title"]')[0].get_text()
    postInfo = html.select('div[class="topTlt"] span')
    contents = html.select('div[class="divAbs"]')
    postTime, postFrom = postInfo[1].get_text(), postInfo[3].get_text()
    text = open("module.html", "r").read()
    text = text.format(title=title,
                       postTime=postTime,
                       postFrom=postFrom,
                       link=url)

    return text + str(contents[0])
Beispiel #6
0
	def __init__(self):
		Gtk.Window.__init__(self, title='AryaLinux Installer')

		self.context = dict()
		self.context['main_window'] = self
		self.context['installation_cmd'] = [ '/usr/bin/sudo', '/bin/bash', '/opt/installer-new/backend.sh' ]
		self.context['install_started'] = False

		self.header = header.Header()
		self.vbox = Gtk.VBox(spacing=5)
		self.stack = stack.Stack(self.context, buttons.prevButton, buttons.nextButton, buttons.installButton)
		self.buttons = buttons.Buttons(self.context)

		self.vbox.pack_start(self.header, False, True, 0)
		self.vbox.pack_start(self.stack, True, True, 0)
		self.vbox.pack_start(Gtk.HSeparator(), False, True, 0)
		self.vbox.pack_start(self.buttons, False, True, 0)
		self.add(self.vbox)

		# Set half the size of screen and center
		screen = Gdk.Screen.get_default()
		self.set_size_request(screen.get_width()/2, screen.get_height()/2)
		self.set_position(Gtk.WindowPosition.CENTER_ALWAYS)
		self.set_border_width(5)

		buttons.cancelButton.connect('clicked', self.confirm_cancellation)

		self.set_border_width(5)
		self.context['install_started'] = False
		self.context['input_started'] = True
Beispiel #7
0
    def headers(self):
        """
        Return a list of all headers for the site.
        """
        soup = BeautifulSoup(self.source(), "html.parser")
        script_tag = None

        for tag in soup.find_all("script"):
            # Skip empty tags
            if not tag.contents:
                continue

            # Check if the tag content starts with the JSON assignment.
            if tag.contents[0].startswith(self.SCRIPT_TAG_START):
                script_tag = tag.contents[0][len(self.SCRIPT_TAG_START):]
                break

        if script_tag is None:
            raise Exception("No tags found")

        json_content = json.loads(script_tag)

        articles = json_content["collections"]

        # Always one key - randomized string.
        random_key = list(articles.keys())[0]
        items = articles[random_key]["contents"]["items"]

        result = []

        for item_id, item in items.items():
            # No more articles if we no longer find the "abse" key.
            if "abse" not in item_id:
                break

            for sub_item in item["items"]:
                # We're looking for teasers to fetch.
                if sub_item["type"] != "box":
                    continue

                if "clickTracking" not in sub_item:
                    continue

                if "object" not in sub_item["clickTracking"]:
                    continue

                if "name" not in sub_item["clickTracking"]["object"]:
                    continue

                title = sub_item["clickTracking"]["object"]["name"]
                text = self._find_text(sub_item)
                link = sub_item["clickTracking"]["target"]["url"]

                result.append(header.Header(
                    title,
                    text,
                    link,
                ))

        return result
Beispiel #8
0
def decrypt_file(input_file, output_file, password=None, privkey_file=None):
    """Decrypts input file."""

    hdr = header.Header()
    with open(input_file, 'rb') as ifstream:
        hdr.read(ifstream)
        try:
            hashpw.HashFunc(hdr.hash_function)
        except ValueError:
            raise ValueError('Unknown hash function in header')
        algorithm = Algorithm(hdr.algorithm)
        data_length = hdr.data_length
        symm_cipher = algorithm.symmetric_cipher()

        if algorithm.is_symmetric() and password is None:
            raise ValueError('Algorithm is symmetric, but password is None')
        if not algorithm.is_symmetric() and privkey_file is None:
            raise ValueError(
                'Algorithm is hybrid, but private key file is None')

        if algorithm.is_symmetric():
            password = password.encode()
            symmkey = hdr.key(password=password)
            if symmkey is None:
                raise ValueError('Invalid password')
        else:
            symmkey = hdr.key(privkey_file=privkey_file)
            if symmkey is None:
                raise ValueError('Invalid private key')

        with open(output_file, 'wb') as ofstream:
            symm_cipher.decrypt(ifstream, ofstream, symmkey, data_length)
Beispiel #9
0
    def testHeaderInitKnownValuesContents(self):
        """Header parsing comparing known result with known input for contents

        """
        for wikiline, contents, level, type in self.knownValues:
            result = header.Header(wikiline).contents
            self.assertEqual(contents, result)
Beispiel #10
0
 def read(self, index):
     """Reads the point at the given index"""
     if self.mode == 0:
         p = point.Point(
         handle=core.las.LASReader_GetPointAt(self.handle, index),
         copy=True)
         p.set_header(lasheader.Header(handle=self._header, copy=False))
         return p
Beispiel #11
0
 def test_search(self):
     """test search"""
     header_obj = header.Header(self.driver)
     #open search form
     assert header_obj.click_search(), "search form is unavailable"
     #try to search by keyword having not empty results
     header_obj.run_search('renault')
     #check search found specific article
     assert (header_obj.search_results_general() == 0) , 'nothing found, expected several news in results'
Beispiel #12
0
def simple_data():
    cab = read_cabextract_cab('simple.cab')
    raw_data_block = cab[0x66:]

    h = header.Header(cab)
    folders = list(folder.create_folders(h, cab))
    f = folders[0]
    datas = list(data.create_datas(h, f, cab))

    return datas, raw_data_block
Beispiel #13
0
    def __init__(self, callingWindow, x, y, width, height, id, movable=True):

        window = Window(x, y, id, movable=True)

        if movable:
            #uses given id and width
            head = header.Header(window)
            window.Add('header', head)
            callingWindow.yOffset += head.height

        window.Add(id, callingWindow)
Beispiel #14
0
 def open(self):
     if self._mode == 'r' or self._mode =='rb':
         self.handle = core.las.LASReader_Create(self.filename)
         self.mode = 0
         self._header = lasheader.Header(handle = core.las.LASReader_GetHeader(self.handle))
         files['read'].append(self.filename)
     if self._mode == 'w' and '+' not in self._mode:
         if not self._header:
             self._header = lasheader.Header(handle = core.las.LASHeader_Create())
         self.handle = core.las.LASWriter_Create(self.filename, self._header.handle, 1)
         self.mode = 1
         files['write'].append(self.filename)
     if '+' in self._mode and 'r' not in self._mode:
         if not self._header:
             reader = core.las.LASReader_Create(self.filename)
             self._header = lasheader.Header(handle = core.las.LASReader_GetHeader(reader))
             core.las.LASReader_Destroy(reader)
         self.handle = core.las.LASWriter_Create(self.filename, self._header.handle, 2)
         self.mode = 2
         files['append'].append(self.filename)
Beispiel #15
0
 def test_login_failed(self):
     """login attempt with incorrect data"""
     header_obj = header.Header(self.driver)
     # try to open login form
     assert not header_obj.login_click_not_loggedin(), "login frame is not displayed"
     # fill in form with incorrect data
     header_obj.enter_username("test_user")
     header_obj.enter_password("test_password")
     #check alert about incorrect logon/password
     assert header_obj.click_login_btn(), "alert about incorrect logon/password is absent"
     #check user is not logged in
     assert header_obj.login_click_loggedin(), "user with incorrect credentials are logged in"
Beispiel #16
0
    def __init__(self, previousHash, transactionsList, nonce, merkle):
        self.magicNumber = 0xD9B4BEF9
        self.transactionCounter = len(transactionsList)

        self.blockHeader = header.Header(
            previousHash, self.transactionListHashesOnly(transactionsList),
            nonce, merkle)

        self.transactions = transactionsList
        self.blockHash = self.calculateHash()
        # Temp size - renewSize method will update size before adding to blockchain
        self.blockSize = sys.getsizeof(cPickle.dumps(self))
Beispiel #17
0
def encrypt_file(input_file,
                 output_file,
                 algorithm,
                 passwords=None,
                 pubkey_files=None):
    """Encrypts input file.

    :param input_file: file with plain text
    :type input_file: str

    :param output_file: file for encryptes text
    :type output_file: str

    :param algorithm: algorithm used to encrypt
    :type algorithm: str

    :param passwords: list of passwords to decrypt the file
    :type passwords: list

    :param pubkey_files: list of files with public keys to encrypt the symmetric key
    :type pubkey_files: list
    """

    algorithm = Algorithm(algorithm)

    if algorithm.is_symmetric() and passwords is None:
        raise ValueError('Algorithm is symmetric, but passwords are None')
    if not algorithm.is_symmetric() and pubkey_files is None:
        raise ValueError('Algorithm is hybrid, but public key files are None')

    hdr = header.Header()
    hdr.algorithm = algorithm.number
    hdr.hash_function = hashpw.ENC_HASH_FUNCTION.number
    hdr.data_length = os.path.getsize(input_file)

    symm_cipher = algorithm.symmetric_cipher()
    symmkey = symm_cipher.keygen()
    if algorithm.is_symmetric():
        for password in passwords:
            password = password.encode()
            hdr.add_user(symmkey, password=password)
    else:
        for pubkey_file in pubkey_files:
            hdr.add_user(symmkey, pubkey_file=pubkey_file)

    with open(input_file, 'rb') as ifstream:
        with open(output_file, 'wb') as ofstream:
            hdr.write(ofstream)
            symm_cipher.encrypt(ifstream, ofstream, symmkey)
    def __init__(self, parent, *args, **kwargs):
        tk.Frame.__init__(self, parent, *args, **kwargs)
        self.predictions_table_frame = tk.Frame(borderwidth=5)
        self.predictions_table = pred_table.PredTable(self.predictions_table_frame)

        self.table_header = header.Header(self)

        self.betting_table_frame = tk.Frame(borderwidth=5)
        self.betting_table = bet_table.BetTable(self.betting_table_frame)

        self.control_panel_frame = tk.Frame()
        self.control_panel = tc.TableControls(self.control_panel_frame)
        self.train_panel = con.ControlPanel(self)

        self.control_panel_frame.grid(row=2, column=0)
        self.predictions_table_frame.grid(row=1, column=0)
        self.betting_table_frame.grid(row=1, column=1)
        self.master.grid_rowconfigure(0, weight=1)
        self.master.grid_columnconfigure(0, weight=1)
Beispiel #19
0
    def headers(self):
        """
        Return a list of all headers for the site.
        """
        soup = BeautifulSoup(self.source(), "html.parser")

        result = []

        for article in soup.find_all("a", class_="teaser"):
            result.append(
                header.Header(
                    " ".join(article.h1.get_text().split()),
                    " ".join(article.p.get_text().split())
                    if article.p is not None else None,
                    "{}{}".format(self.url(), article.get("href")),
                    False,
                ))

        return result
Beispiel #20
0
    def headers(self):
        """
        Return a list of all headers for the site.
        """
        soup = BeautifulSoup(self.source(), "html.parser")

        result = []

        for article in soup.find_all("article", class_="nyh_teaser"):
            textwrapper = article.find("div", class_="nyh_teaser__textwrapper")

            result.append(
                header.Header(
                    textwrapper.h1.get_text(),
                    textwrapper.div.get_text(),
                    "{}{}".format(self.url(), article.a.get("href")),
                    False,
                ))

        return result
Beispiel #21
0
    def headers(self):
        """
        Return a list of all headers for the site.
        """
        soup = BeautifulSoup(self.source(), "html.parser")

        result = []

        for article in soup.find_all("div", class_="text"):
            subheading = article.find("div", class_="subheading")

            result.append(
                header.Header(
                    article.h1.get_text(),
                    subheading.get_text(),
                    "{}{}".format(self.url(), article.a.get("href")),
                    False,
                ))

        return result
Beispiel #22
0
    def headers(self):
        """
        Return a list of all headers for the site.
        """

        gql = self.source()

        result = []

        for item in gql["data"]["result"]["hits"]:
            teaser = item["teaser"]

            result.append(
                header.Header(
                    teaser["title"],
                    teaser["text"],
                    "{}{}".format(self.url(), item["urlPath"]),
                    item["paywall"] == "premium",
                ))

        return result
Beispiel #23
0
    def __iter__(self):
        """Iterator support (read mode only)

          >>> points = []
          >>> for i in f:
          ...   points.append(i)
          ...   print i # doctest: +ELLIPSIS
          <liblas.point.Point object at ...>
        """
        if self.mode == 0:
            self.at_end = False
            p = core.las.LASReader_GetNextPoint(self.handle)
            while p and not self.at_end:
                p2 = point.Point(handle=p, copy=True)
                p2.set_header(lasheader.Header(handle=self._header, copy=False))
                yield p2
                p = core.las.LASReader_GetNextPoint(self.handle)
                if not p:
                    self.at_end = True
            else:
                self.close()
                self.open()
Beispiel #24
0
class Controller():

    settings = ConfigParser.ConfigParser()
    header = header.Header()

    def start(self):
        self.read_inputs()
        self.set_logger()

        if self.header.crawler_switch==1:
            self.run_crawler()
        if self.header.classifier_switch==1:
            self.run_classification_engine()

        return None

    def set_logger(self):

        if not os.path.isdir(self.header.log_dir):
            logging.warning("""LOG_DIR_PATH not set properly.
                            logs are redirected to log.txt placed at:"""
                            + self.header.home_dir)
            log_dir_path = self.header.home_dir



        LEVELS = {'debug': logging.DEBUG,
                      'info': logging.INFO,
                      'warning': logging.WARNING,
                      'error': logging.ERROR,
                      'critical': logging.CRITICAL}

        try:

            level = LEVELS.get(self.header.level_name , logging.NOTSET)
            path = os.path.join(self.header.log_dir,self.header.log_file)

            logging.basicConfig(filename=path , level=level)

            if level == logging.NOTSET:
                logging.warning("""Logging level not set properly.
                                "Allowed levels are : critical , error ,
                                "warning , info , debug """)

        except Exception as e:
            print e.message
            return None

    def read_inputs(self):
        self.settings.read(self.header.settings_file)
        try:
            #Directories
            section = 'DIRECTORIES'
            self.header.home_dir = self.settings.get(section,
                                                'HOME_DIR')
            self.header.output_dir= self.settings.get(section,
                                             'OUTPUT_DIR')
            self.header.log_dir= self.settings.get(section,
                                                 'LOG_DIR')
            self.header.resource_dir= self.settings.get(section,
                                              'RESOURCE_DIR')


            #Crawler_Inputs
            section = 'CRAWLER_INPUTS'
            self.header.crawler_switch=self.settings.get(section,
                                                    'CRAWLER_SWITCH')
            self.header.crawler_switch=int(self.header.crawler_switch)

            self.header.site_info_file = self.settings.get(section,
                                                        'SITE_INFO_FILE')
            self.header.keyword_file = self.settings.get(section,
                                                       'KEYWORD_FILE')

            self.header.site_timestamp_pickle_file = self.settings.get(section,
                                                       'SITE_PICKLE_DUMP_FILE')

            self.header.crawler_output = self.settings.get(section,
                                                    'CRAWLER_OUTPUT')
            self.header.key_word_filering_switch=self.settings.get(
                section, 'KEY_WORD_FILTERING_SWITCH'
            )
            self.header.key_word_filering_switch=int(
                self.header.key_word_filering_switch
            )

            self.header.self.artcile_story_pull = self.settings.get(
                section, 'ARTICLE_STORY_PULL'
            )
            self.header.self.artcile_story_pull = \
                self.artcile_story_pull.title()

            #Logging_Inputs
            section = 'LOGGING_INPUTS'
            self.header.log_file=self.settings.get(
                section,'LOG_FILE'
            )
            self.header.level_name = self.settings.get(
                section,'LEVEL_NAME'
            )



            #Classifier_Inputs
            section = 'CLASSIFIER_INPUTS'

            self.header.training_file = self.settings.get(
                                                    section,
                                                    'TRAINING_FILE_PATH')

            self.header.classifier_switch=self.settings.get(section,
                                                       'CLASSIFIER_SWITCH')
            self.header.classifier_switch=int(self.header.classifier_switch)

            self.header.label_col_name_risk_classifier = self.settings.get(
                                            section,
                                            'LABEL_COL_NAME_RISK_CLASSIFIER')

            self.header.text_field_risk_classifier = self.settings.get(
                                            section,
                                            'TEXT_FIELDS_RISK_CLASSIFIER')\
                                            .split(',')


            self.header.risk_classifier_model_file = self.settings.get(
                                        section,
                                        'RISK_CLASSIFIER_MODEL_FILE')

            self.header.text_field_topic_classifier = self.settings.get(
                                                section,
                                                'TEXT_FIELDS_TOPIC_CLASSIFIER'
                                                ).split(',')

            self.header.label_col_name_topic_classifier = self.settings.get(
                                            section,
                                            'LABEL_COL_NAME_TOPIC_CLASSIFIER')

            self.header.topic_classifier_model_file = self.settings.get(
                                            section,
                                            'TOPIC_CLASSIFIER_MODEL_FILE')

            self.header.label_col_name_sub_topic_classifier = self.settings.get(
                                        section,
                                        'LABEL_COL_NAME_SUB_TOPIC_CLASSIFIER' )

            self.header.risk_classifier_ratio_remove_skewness = float(
                                                        self.settings.get(
                                    section,
                                    'RISK_CLASSIFIER_RATIO_REMOVE_SKEWNESS' ))

            self.header.classifier_output=self.settings.get(section,
                                                       'CLASSIFIER_OUTPUT')

            if "true" in (self.settings.get(section ,'IS_REMOVE_SKEWNESS')).lower():
                self.header.isRemoveSkewness = True

        except Exception as e:
            print e.message


        return None





    def run_classification_engine(self):
        analyzer = Analysis_Engine(self.header)
        analyzer.performTrainingForAllTasks()
        analyzer.analyze()
        return None

    def run_crawler(self):
        crawler = Crawler(self.header)
        return None

    def a(self):
        pre_processor = Pre_Processor(root , file_ids=[],
                                stop_words_removal = False,
                                named_entity_removal = False,
                                hypernom_substitution = False)

        pre_processor.start()
Beispiel #25
0
def simple_files():
    cab = read_cabextract_cab('simple.cab')
    h = header.Header(cab)
    return list(cabfile.create_files(h, cab))
Beispiel #26
0
 def test_header_news_dd_menu(self):
     """check news submenu and part of buttons in it"""
     header_obj = header.Header(self.driver)
     for i in header_obj.submenu_links:
         header_obj.newsbtn_mouse_on()
         assert header_obj.news_submenu_click(i) == header_obj.submenu_links[i]
Beispiel #27
0
def simple_header():
    cab = read_cabextract_cab('simple.cab')
    return header.Header(cab)
Beispiel #28
0
 def test_(self):
     """"""
     header_obj = header.Header(self.driver)
Beispiel #29
0
    def parseWikiPage(self, content):
        '''This function will parse the content of a Wiktionary page
           and read it into our object structure.
           It returns a list of dictionaries. Each dictionary contains a header object
           and the textual content found under that header. Only relevant content is stored.
           Empty lines and lines to create tables for presentation to the user are taken out.'''

        templist = []
        context = {}
        aheader = ''
        splitcontent = []
        content = content.split('\n')
        for line in content:
            #           print line
            # Let's get rid of line breaks and extraneous white space
            line = line.replace('\n', '').strip()
            # Let's start by looking for general stuff, that provides information which is
            # interesting to store at the page level
            if '{wikipedia}' in line.lower():
                self.addLink('wikipedia')
                continue
            if '[[category:' in line.lower():
                category = line.split(':')[1].replace(']', '')
                self.addCategory(category)
                #                print 'category: ', category
                continue
            if '|' not in line:
                bracketspos = line.find('[[')
                colonpos = line.find(':')
                if bracketspos != -1 and colonpos != -1 and bracketspos < colonpos:
                    # This seems to be an interwikilink
                    # If there is a pipe in it, it's not a simple interwikilink
                    linkparts = line.replace(']', '').replace('[',
                                                              '').split(':')
                    lang = linkparts[0]
                    linkto = linkparts[1]
                    if len(lang) > 1 and len(lang) < 4:
                        self.addLink(lang + ':' + linkto)
                    continue
            # store empty lines literally, this is necessary for the blocks we don't parse
            # and will return literally
            if len(line) < 2:
                templist.append(line)
                continue
#        print 'line0:',line[0], 'line-2:',line[-2],'|','stripped line-2',line.rstrip()[-2]
            if line.strip()[0] == '=' and line.rstrip(
            )[-2] == '=' or '{{-' in line and '-}}' in line:
                # When a new header is encountered, it is necessary to store the information
                # encountered under the previous header.
                if templist and aheader:
                    tempdictstructure = {
                        'text': templist,
                        'header': aheader,
                        'context': copy.copy(context),
                    }
                    templist = []
                    splitcontent.append(tempdictstructure)
#                print "splitcontent: ",splitcontent,"\n\n"
                aheader = header.Header(line)
                #                print "Header parsed:",aheader.level, aheader.header, aheader.type, aheader.contents
                if aheader.type == u'lang':
                    context['lang'] = aheader.contents
                if aheader.type == u'pos':
                    if not 'lang' in context:
                        # This entry lacks a language indicator,
                        # so we assume it is the same language as the Wiktionary we're working on
                        context['lang'] = self.wikilang
                    context['pos'] = aheader.contents

            else:
                # It's not a header line, so we add it to a temporary list
                # containing content lines
                if aheader.contents == u'trans':
                    # Under the translations header there is quite a bit of stuff
                    # that's only needed for formatting, we can just skip that
                    # and go on processing the next line
                    lower = line.lower()
                    if '{top}' in lower: continue
                    if '{mid}' in lower: continue
                    if '{bottom}' in lower: continue
                    if '|-' in line: continue
                    if '{|' in line: continue
                    if '|}' in line: continue
                    if 'here-->' in lower: continue
                    if 'width=' in lower: continue
                    if '<!--left column' in lower: continue
                    if '<!--right column' in lower: continue

                templist.append(line)

            # Let's not forget the last block that was encountered
            if templist:
                tempdictstructure = {
                    'text': templist,
                    'header': aheader,
                    'context': copy.copy(context),
                }
                splitcontent.append(tempdictstructure)

        # make sure variables are defined before they are used
        gender = sample = plural = diminutive = label = definition = ''
        number = 1
        diminutive = False
        examples = []
        for contentblock in splitcontent:
            headercontent = contentblock['header'].contents

            #            print "contentblock:",contentblock
            #            print contentblock['header']
            # Now we parse the text blocks.
            # Let's start by describing what to do with content found under the POS header
            if contentblock['header'].type == u'pos':
                flag = False
                for line in contentblock['text']:
                    #                    print line
                    if line[:3] == "'''":
                        # This seems to be an ''inflection line''
                        # It can be built up like this: '''sample'''
                        # Or more elaborately like this: '''staal''' ''n'' (Plural: [[stalen]],     diminutive: [[staaltje]])
                        # Or like this: {{en-infl-reg-other-e|ic|e}}
                        # Let's first get rid of parentheses and brackets:
                        line = line.replace('(', '').replace(')', '').replace(
                            '[', '').replace(']', '')
                        # Then we can split it on the spaces
                        for part in line.split(' '):
                            #                            print part[:3], "Flag:", flag
                            if flag == False and part[:3] == "'''":
                                sample = part.replace("'", '').strip()
#                                print 'Sample:', sample
# OK, so this should be an example of the term we are describing
# maybe it is necessary to compare it to the title of the page
                            if sample:
                                for subpart in line.split(' '):
                                    maybegender = part.replace(
                                        "'",
                                        '').replace("}",
                                                    '').replace("{",
                                                                '').lower()
                                    if maybegender == 'm':
                                        gender = 'm'
                                    if maybegender == 'f':
                                        gender = 'f'
                                    if maybegender == 'n':
                                        gender = 'n'
                                    if maybegender == 'c':
                                        gender = 'c'
                                    if maybegender[:1] == 'p':
                                        number = 2
                                    if maybegender[:3] == 'dim':
                                        diminutive = True
#                            print 'Gender: ',gender
                            if part.replace("'", '')[:2].lower() == 'pl':
                                flag = 'plural'
                            if part.replace("'", '')[:3].lower() == 'dim':
                                flag = 'diminutive'
                            if flag == 'plural':
                                plural = part.replace(',',
                                                      '').replace("'",
                                                                  '').strip()
#                                print 'Plural: ',plural
                            if flag == 'diminutive':
                                diminutive = part.replace(',', '').replace(
                                    "'", '').strip()
#                                print 'Diminutive: ',diminutive
                    if line[:2] == "{{":
                        # Let's get rid of accolades:
                        line = line.replace('{', '').replace('}', '')
                        # Then we can split it on the dashes
                        parts = line.split('-')
                        lang = parts[0]
                        what = parts[1]
                        mode = parts[2]
                        other = parts[3]
                        infl = parts[4].split('|')
                    if sample:
                        # We can create a Term object
                        # TODO which term object depends on the POS
                        #                        print "contentblock['context'].['lang']", contentblock['context']['lang']
                        if headercontent == 'noun':
                            theterm = term.Noun(
                                lang=contentblock['context']['lang'],
                                term=sample,
                                gender=gender,
                                number=number,
                                diminutive=diminutive)
                        if headercontent == 'verb':
                            theterm = term.Verb(
                                lang=contentblock['context']['lang'],
                                term=sample)
                        sample = ''
#                        raw_input("")
                    if line[:1].isdigit():
                        # Somebody didn't like automatic numbering and added static numbers
                        # of their own. Let's get rid of them
                        while line[:1].isdigit():
                            line = line[1:]
                        # and replace them with a hash, so the following if block picks it up
                        line = '#' + line
                    if line[:1] == "#":
                        # This probably is a definition
                        # If we already had a definition we need to store that one's data
                        # in a Meaning object and make that Meaning object part of the Page object
                        if definition:
                            ameaning = meaning.Meaning(term=theterm,
                                                       definition=definition,
                                                       label=label,
                                                       examples=examples)

                            # sample
                            # plural and diminutive belong with the Noun object
                            # comparative and superlative belong with the Adjective object
                            # conjugations belong with the Verb object

                            # Reset everything for the next round
                            sample = plural = diminutive = label = definition = ''
                            examples = []

                            if not contentblock['context'][
                                    'lang'] in self.entries:
                                # If no entry for this language has been foreseen yet
                                # let's create one
                                anentry = entry.Entry(
                                    contentblock['context']['lang'])
                                # and add it to our page object
                                self.addEntry(anentry)
                            # Then we can easily add this meaning to it.
                            anentry.addMeaning(ameaning)

                        pos = line.find('<!--')
                        if pos != -1 and pos < 4:
                            # A html comment at the beginning of the line means this entry already has disambiguation labels, great
                            pos2 = line.find('-->')
                            label = line[pos + 4:pos2]
                            definition = line[pos2 + 1:]
#                            print 'label:',label
                        else:
                            definition = line[1:].strip()
#                        print "Definition: ", definition
                    if line[:2] == "#:":
                        # This is an example for the preceding definition
                        example = line[2:]
                        #                        print "Example:", example
                        examples.add(example)
            # Make sure we store the last definition
            if definition:
                ameaning = meaning.Meaning(term=theterm,
                                           definition=definition,
                                           label=label,
                                           examples=examples)
                if not contentblock['context']['lang'] in self.entries:
                    # If no entry for this language has been foreseen yet
                    # let's create one
                    anentry = entry.Entry(contentblock['context']['lang'])
                    # and add it to our page object
                    self.addEntry(anentry)
                    # Then we can easily add this meaning to it.
                    anentry.addMeaning(ameaning)

            winner = False  # This is going to contain the Meaning object which has the Definition which matches the Concisedef of the entry we are working on right now
            if headercontent == 'trans' or headercontent == 'syn' or headercontent == 'ant':
                # On the English Wiktionary we will find concisedefs here to link definitions to the content of these sections, but only if there is more than one definition.
                print "number of meanings:", len(anentry.meanings.keys())
                concisedefclean = ''
                for line in contentblock['text']:
                    if line[:3] == "'''":
                        # This seems to be a line containing a concisedef
                        concisedef = line.replace("'''", '').strip()
                        concisedefclean = concisedef.replace("(", '').replace(
                            ")",
                            '').replace("'",
                                        '').replace(":",
                                                    '').replace(".",
                                                                '').lower()
                    if line[:2] == "*(":
                        # This seems to be a line containing a concisedef
                        pos = line.find(')')
                        concisedef = line[2:pos].strip()
                        concisedefclean = concisedef.replace("(", '').replace(
                            ")",
                            '').replace("'",
                                        '').replace(":",
                                                    '').replace(".",
                                                                '').lower()
                        restofline = line[pos + 2:].strip()
                    # Now we have this concisedef, it's worthless if it can't
                    # be matched to a definition in order to know to what
                    # meaning the following content belongs to

                    # Let's start by creating a list of meanings for the entry
                    # we're working on
                    if concisedefclean:
                        highest = 0
                        winner = anentry.meanings[contentblock['context']
                                                  ['pos']][0]
                        for anothermeaning in anentry.meanings[
                                contentblock['context']['pos']]:
                            score = 0
                            for word in concisedefclean.split():
                                definition = anothermeaning.definition.replace(
                                    "(", '').replace(")", '').replace(
                                        "'", '').replace(":", '').replace(
                                            ".", '').replace("#", '').lower()
                                if len(
                                        word
                                ) > 1 and ' ' + word + ' ' in definition:
                                    score += 1
                                if len(word) > 2 and word in definition:
                                    score += 1
                            if score > highest:
                                highest = score
                                winner = anothermeaning
#                        print 'winner:',winner.definition, 'score:',highest
                        winner.setConciseDef(concisedef)
                        if headercontent=='trans':
                            """
                            We have to find a way to read the rest of the lines until the next ConciseDef into a structure, that can be processed later on. In contrast to a list of synonyms where the synonyms are on the rest of the lines, translations are on the following lines.
                            It's also possible that there is no concisedef and that the translation's block simpy starts... or that there are numbers instead of concisedefs.
                            """

                    if headercontent == 'syn':
                        #                        print 'syn',restofline
                        winner.parseSynonyms(restofline)
                    if headercontent == 'trans':
                        #                        print 'trans',restofline
                        winner.parseTranslations(line)
Beispiel #30
0
    def handle(self, pkt: Packet):
        # handle a SYN packet
        if pkt.header.flags == SYN:
            print("received SYN")
            syn_ack = Packet(data=b"",
                             header=header.Header(
                                 ack_number=pkt.header.SYN_number + 1,
                                 flags=header.SYNACK,
                                 stream_id=random.randint(0, 65535)))
            if pkt.header.windows > self.window_size:
                syn_ack.header.windows = self.window_size
            else:
                syn_ack.header.windows = pkt.header.windows
                self.window_size = pkt.header.windows
            syn_ack.header.genchecksum(syn_ack.data)
            self.send_buf.put(syn_ack, timeout=self.timeout)
            self.cur_stream_id = syn_ack.header.stream_id
            self.connected = True
            self.original_syn = pkt.header.SYN_number
            print(self.connected)
            print(self.cur_stream_id)

        # if we recv a SYN ACK outside of our normal opening handshake, we ack it
        elif pkt.header.flags == SYNACK:
            ack = Packet(data=b"",
                         header=header.Header(
                             ack_number=pkt.header.SYN_number + 1,
                             flags=header.ACK,
                             windows=self.window_size,
                             stream_id=self.cur_stream_id,
                         ))
            ack.header.genchecksum(ack.data)
            self.send_buf.put(ack, timeout=self.timeout)

        elif pkt.header.flags == FIN:
            fin_ack = Packet(data=b"",
                             header=header.Header(
                                 syn_number=pkt.header.SYN_number + 1,
                                 flags=header.FINACK,
                                 windows=self.window_size,
                                 ack_number=pkt.header.SYN_number,
                                 stream_id=self.cur_stream_id,
                             ))
            fin_ack.header.genchecksum(fin_ack.data)
            self.send_buf.put(fin_ack, timeout=self.timeout)
            print("sending finack")
            self.fin_num = pkt.header.SYN_number
            self.connected = False
            self.got_fin = True
            print("server connection:")
            print(self.connected)

        # if we recv a FINACK outside of our normal closing handshake, we ack it
        elif pkt.header.flags == FINACK:
            ack = Packet(data=b"",
                         header=header.Header(stream_id=self.cur_stream_id,
                                              ack_number=pkt.header.SYN_number,
                                              flags=ACK,
                                              windows=pkt.header.windows))
            ack.header.genchecksum(ack.data)
            self.send_buf.put(ack)

        elif pkt.header.flags == ACK:
            try:
                self.sent.pop(pkt.header.ACK_number - 1)
            except KeyError:
                print("keyError on ACK num" + str(pkt.header.ACK_number))
                pass

        else:
            ack = Packet(data=b"",
                         header=header.Header(
                             ack_number=pkt.header.SYN_number + 1,
                             stream_id=self.cur_stream_id,
                             windows=self.window_size,
                             flags=ACK,
                         ))
            self.received.update({pkt.header.SYN_number: pkt})
            ack.header.genchecksum(ack.data)
            self.send_buf.put(ack)