Ejemplo n.º 1
0
def count_words(sentence):
    n = len(sentence)
    # parsing
    s = sentence[0:n].lower()
    i = 0
    while i < n:
        if not isalnum(s[i]) and not isspace(s[i]):
            if i == 0:
                s = s[i + 1:]
            elif i == n - 1:
                s = s[0:i]
            elif s[i] == '\'' and isalpha(s[i - 1]) and isalpha(s[i + 1]):
                i += 1
            else:
                s = s[:i] + " " + s[i + 1:]
        elif isspace(s[i]):
            if isspace(s[i - 1]):
                # there can only be one space between words
                s = s[:i] + s[i + 1:]
            elif isalnum(s[i - 1]) and s[i] != ' ':
                # this will make it easier to split the lines
                s = s[:i] + " " + s[i + 1:]
            else:
                i += 1
        else:
            i += 1
        n = len(s)
    parsed_l = s.strip().split(' ')
    # searching
    counts = dict()
    for i in parsed_l:
        counts[i] = counts.get(i, 0) + 1
    return counts
Ejemplo n.º 2
0
    def compute_score_en(self, s):
        x = DictWithDefault(lambda: 0)
        nspace = 0
        nalpha = 0
        nbad = 0
        n = len(s)
        for c in s:
            if isspace(c) or c == '_':
                nspace += 1
            elif isalpha(c):
                nalpha += 1
                x[c | 32] += 1
            elif not isgoodchar(c):
                nbad += 1

        tb = sorted(x.values())
        tb.reverse()
        score = 0
        if nalpha == 0:
            score += 1000
        else:
            for c, v in self.freq_en.items():
                score += (v - x[c] / nalpha)**2
        #score += (nspace / n - self.freq_sorted[0])**2
        score += (300 * nbad / n)**2
        return Attributize(score=score, nbad=nbad)
Ejemplo n.º 3
0
    def get_width_in_text_space(self) -> Decimal:
        """
        This function calculates the width (in text space) of this GlyphLine
        """
        w: Decimal = Decimal(0)
        for g in self._glyphs:
            glyph_width_in_text_space = g.width * self._font_size * Decimal(
                0.001)

            # add word spacing where applicable
            if len(g.unicode_str) == 1 and isspace(g.unicode_str):
                glyph_width_in_text_space += self._word_spacing

            # horizontal scaling
            glyph_width_in_text_space *= self._horizontal_scaling / Decimal(
                100)

            # add character spacing to character_width
            glyph_width_in_text_space += self._character_spacing

            # add character width to total
            w += glyph_width_in_text_space

        # subtract character spacing once (there are only N-1 spacings in a string of N characters)
        w -= self._character_spacing

        # return
        return w
Ejemplo n.º 4
0
    def _recv(self):
        """Receive a message from the client.

        Return message (str) or None if failure

        """
        RECV_SIZE = 1024
        try:
            while (True):
                m = self._csocket.recv(RECV_SIZE)
                if not m:
                    logging.error('failed to recv message, client disconnected')
                    return None
                # Decode the incoming data as utf-8 ignoring any
                # control characters
                m = bytes([ b if ascii.isgraph(b) or ascii.isspace(b) else 0xff for b in m ])
                self._recv_str = self._recv_str + m.decode('utf-8', 'ignore')
                if self._recv_str.find('\n') >= 0 or self._recv_str.find('\r') >= 0:
                    lines = self._recv_str.splitlines()
                    self._recv_str = '\n'.join(lines[1:])
                    return lines[0]
        except Exception as ex:
            logging.error('failed to recv message to client {0}: {1}'.format(self._csocket.getpeername(), ex))
            return False
        return True
Ejemplo n.º 5
0
	def processInput(self,key):
		if(ascii.isprint(key) or ascii.isspace(key)):
			self.text=self.text + chr(key)
			return True
		if(key==ascii.DEL):
			self.text=self.text[:-1]
			return True
		return False
Ejemplo n.º 6
0
 def processInput(self, key):
     if (ascii.isprint(key) or ascii.isspace(key)):
         self.text = self.text + chr(key)
         return True
     if (key == ascii.DEL):
         self.text = self.text[:-1]
         return True
     return False
Ejemplo n.º 7
0
 def do(self):
     r = self.reader
     b = r.buffer
     eol = r.eol()
     for c in b[r.pos:eol]:
         if not ascii.isspace(c):
             self.kill_range(r.pos, eol)
             return
     else:
         self.kill_range(r.pos, eol+1)
Ejemplo n.º 8
0
def alpha_to_unicode(alpha):
    '''
    Convert an alpha to a Unicode character.
    '''
    alpha = alpha.upper()
    try:
        if ASCII.isprint(alpha):  # includes ASCII space, 0x20
            return chr(UNICODE_BRAILLE_BASE + mapping.index(alpha))
        elif ASCII.isspace(alpha):
            return alpha
        else:
            return chr(UNICODE_BRAILLE_BASE)
    except ValueError:
        return 0
Ejemplo n.º 9
0
def search_abilities(stdscr):
    my, mx = stdscr.getmaxyx()
    curses.noecho()
    #Get curses to do key conversion
    stdscr.keypad(True)

    search_input = ""
    selected = 0
    search_results = search(search_input)
    render_static(stdscr, search_input, search_results, selected)
    while True:
        character = stdscr.getkey()

        if len(character) > 1:
            if "KEY_BACKSPACE" == character:
                search_input = search_input[:-1]
                search_results = search(search_input)
                selected = 0
            elif "KEY_DOWN" == character and selected < len(search_results) - 1:
                selected += 1
            elif "KEY_UP" == character and selected > 0:
                selected -= 1
            else:
                if len(search_results) > 0:
                    render_result(stdscr, search_results[selected])
                else:
                    curses.beep()
        elif character == "\n":
            if len(search_results) > 0:
                render_result(stdscr, search_results[selected])
            else:
                curses.beep()
        elif ascii.isalnum(character) or ascii.isspace(character):
            search_input += character
            search_results = search(search_input)
            selected = 0
        render_static(stdscr, search_input, search_results, selected)
Ejemplo n.º 10
0
def guessParsing(file1):
    # Look at the lines in the first 10000 chars for clues on the type.
    # disabled /*If the lines are around at least 1000 chars longs, assume it is binary*/
    # If there are unprintable characters then binary
    if not all([isgraph(c) or isspace(c) for c in file1.read(10000)]):
        print "Binary file"
        parsed_as = 'Binary file'
        return parsed_as, None
    file1.seek(0)
    firstlines = file1.read(10000).splitlines()
    # if len(''.join(firstlines))/len(firstlines) >= 1000:
        # print "IGNORE"
    # print firstlines
    # file1.seek(0)

    # ret = json.loads(file1)

    percentContainingComma = len([1 for line in firstlines if line.find(',') != -1]) * 100 / len(firstlines)
    # numberOfSquareBrackets = len([1 for line in firstlines if line.find('[')!=-1])

    file1.seek(0)

    lines = list(Lines(file1.read().splitlines()))
    # print lines

    if len(lines) == 0:
        print "Empty file"
        parsed_as = 'Empty file'
        return parsed_as, []

    beginsWithBrackets = [line.startswith('{') or line.startswith('[') for line in lines[0:20]]

    if any(beginsWithBrackets):
        text = ' '.join(lines[beginsWithBrackets.index(True):])
        text = re.sub(r', *([\]\}])', '\\1', text)
        if re.search(r'} *{', text) is not None:
            print "List of json maps"
            text = re.sub(r'} *{', '},{', text)
            text = '[' + text + ']'
        if re.search(r'\] *\[', text) is not None:
            print "List of json maps"
            text = re.sub(r'\] *\[', '],[', text)
            text = '[' + text + ']'

        try:
            print text
            ret = json.loads(text)
            print "Valid JSON"
            parsed_as = 'Valid JSON'
            return parsed_as, ret
        except:
            pass
        # print text
        text = re.sub(r'([\[\{,]) *([^" ])', '\\1"\\2', text)
        text = re.sub(r'([^" ]) *([\]\},])', '\\1"\\2', text)
        text = re.sub(r'"([0-9]+\.?[0-9]*e?[0-9]*)"', '\\1', text)
        # print text
        try:
            # print text
            ret = json.loads(text)
            print "Almost valid JSON"
            parsed_as = 'Almost valid JSON (multiple entries not explicitly in an array)'
            return parsed_as, ret
        except:
            pass

        # ParseArrayOrMap(text)

    # try:
    #  ParseArrayOrMap(' '.join(lines))
    # except:
    # print lines

    if percentContainingComma > 50:
        print "CSV"
        parsed_as = 'CSV (does not begin with bracket, most lines contain a comma)'
        ret = csv.reader(lines, delimiter=',')
        # ret = [line.split(',') for line in lines]
    else:
        parsed_as = 'SSV (does not begin with bracket, most lines do not contain a comma)'
        print "SSV"
        # print lines
        ret = csv.reader(lines, delimiter=' ', skipinitialspace=True)
        # ret = [line.split(' ') for line in lines]

    ret = [[parseSingle(i) for i in row] for row in ret]

    if len(ret) == 1:
        ret = ret[0]
    elif all([len(row) == 1 for row in ret]):
        ret = [row[0] for row in ret]

    return parsed_as, ret
Ejemplo n.º 11
0
def isgoodchar(x):
    return isspace(x) or isgraph(x)
Ejemplo n.º 12
0
def guessParsing(file1):
    # Look at the lines in the first 10000 chars for clues on the type.
    # disabled /*If the lines are around at least 1000 chars longs, assume it is binary*/
    # If there are unprintable characters then binary
    if not all([isgraph(c) or isspace(c) for c in file1.read(10000)]):
        print "Binary file"
        parsed_as = 'Binary file'
        return parsed_as, None
    file1.seek(0)
    firstlines = file1.read(10000).splitlines()
    # if len(''.join(firstlines))/len(firstlines) >= 1000:
    # print "IGNORE"
    # print firstlines
    # file1.seek(0)

    # ret = json.loads(file1)

    percentContainingComma = len([
        1 for line in firstlines if line.find(',') != -1
    ]) * 100 / len(firstlines)
    # numberOfSquareBrackets = len([1 for line in firstlines if line.find('[')!=-1])

    file1.seek(0)

    lines = list(Lines(file1.read().splitlines()))
    # print lines

    if len(lines) == 0:
        print "Empty file"
        parsed_as = 'Empty file'
        return parsed_as, []

    beginsWithBrackets = [
        line.startswith('{') or line.startswith('[') for line in lines[0:20]
    ]

    if any(beginsWithBrackets):
        text = ' '.join(lines[beginsWithBrackets.index(True):])
        text = re.sub(r', *([\]\}])', '\\1', text)
        if re.search(r'} *{', text) is not None:
            print "List of json maps"
            text = re.sub(r'} *{', '},{', text)
            text = '[' + text + ']'
        if re.search(r'\] *\[', text) is not None:
            print "List of json maps"
            text = re.sub(r'\] *\[', '],[', text)
            text = '[' + text + ']'

        try:
            print text
            ret = json.loads(text)
            print "Valid JSON"
            parsed_as = 'Valid JSON'
            return parsed_as, ret
        except:
            pass
        # print text
        text = re.sub(r'([\[\{,]) *([^" ])', '\\1"\\2', text)
        text = re.sub(r'([^" ]) *([\]\},])', '\\1"\\2', text)
        text = re.sub(r'"([0-9]+\.?[0-9]*e?[0-9]*)"', '\\1', text)
        # print text
        try:
            # print text
            ret = json.loads(text)
            print "Almost valid JSON"
            parsed_as = 'Almost valid JSON (multiple entries not explicitly in an array)'
            return parsed_as, ret
        except:
            pass

        # ParseArrayOrMap(text)

    # try:
    #  ParseArrayOrMap(' '.join(lines))
    # except:
    # print lines

    if percentContainingComma > 50:
        print "CSV"
        parsed_as = 'CSV (does not begin with bracket, most lines contain a comma)'
        ret = csv.reader(lines, delimiter=',')
        # ret = [line.split(',') for line in lines]
    else:
        parsed_as = 'SSV (does not begin with bracket, most lines do not contain a comma)'
        print "SSV"
        # print lines
        ret = csv.reader(lines, delimiter=' ', skipinitialspace=True)
        # ret = [line.split(' ') for line in lines]

    ret = [[parseSingle(i) for i in row] for row in ret]

    if len(ret) == 1:
        ret = ret[0]
    elif all([len(row) == 1 for row in ret]):
        ret = [row[0] for row in ret]

    return parsed_as, ret
Ejemplo n.º 13
0
def byte_to_readable(bdata):
    s = ''
    for b in bdata:
        s += chr(b) if ascii.isprint(b) or ascii.isspace(b) else '.'

    return s
Ejemplo n.º 14
0
def read(filename, included=None):
    """
    Function to read a configuration file into a dictionary.
    
    Parmaeters:
        filename - the file to read
        included - files previously read (internal)
    
    Exceptions:
        IOError - when the configuration file cannot be read
    """

    if not included:
        included = []
    if filename in included:
        return {}
    included.append(filename)

    conffile = open(filename)
    
    options = {}

    while True:

        line = conffile.readline()
        if line == '':
            break

        # remove comments
        if '#' in line:
            line = line[:line.find('#')]

        # combine lines when the newline is escaped with \
        while len(line) > 1 and line[-2] == '\\':
            line = line[:-2] + line[-1]
            next = conffile.readline()
            line += next
            if next == '':
                break

        line = line.strip()

        # process include statements
        if line.find("include") == 0 and isspace(line[7]):

            filename = line[8:].strip()
            options.update(read(filename, included))
            continue

        # split 'key = value' into key and value and strip results
        pair = map(str.strip, line.split('=', 1))
        
        # found key and value
        if len(pair) == 2:
            key, val = pair

            # found quoted string?
            if val and val[0] == val[-1] == '"':
                val = val[1:-1]

            # unquoted, found num?
            elif val:
                try:
                    if "." in val:
                        val = float(val)
                    elif val[0] == '0':
                        val = int(val, 8)
                    else:
                        val = int(val)
                except ValueError:
                    pass
            
            # save key and value
            options[key] = val

        # found only key, value = None
        elif len(pair[0]) > 1:
            key = pair[0]
            options[key] = None

    return options