def count_words(sentence): n = len(sentence) # parsing s = sentence[0:n].lower() i = 0 while i < n: if not isalnum(s[i]) and not isspace(s[i]): if i == 0: s = s[i + 1:] elif i == n - 1: s = s[0:i] elif s[i] == '\'' and isalpha(s[i - 1]) and isalpha(s[i + 1]): i += 1 else: s = s[:i] + " " + s[i + 1:] elif isspace(s[i]): if isspace(s[i - 1]): # there can only be one space between words s = s[:i] + s[i + 1:] elif isalnum(s[i - 1]) and s[i] != ' ': # this will make it easier to split the lines s = s[:i] + " " + s[i + 1:] else: i += 1 else: i += 1 n = len(s) parsed_l = s.strip().split(' ') # searching counts = dict() for i in parsed_l: counts[i] = counts.get(i, 0) + 1 return counts
def compute_score_en(self, s): x = DictWithDefault(lambda: 0) nspace = 0 nalpha = 0 nbad = 0 n = len(s) for c in s: if isspace(c) or c == '_': nspace += 1 elif isalpha(c): nalpha += 1 x[c | 32] += 1 elif not isgoodchar(c): nbad += 1 tb = sorted(x.values()) tb.reverse() score = 0 if nalpha == 0: score += 1000 else: for c, v in self.freq_en.items(): score += (v - x[c] / nalpha)**2 #score += (nspace / n - self.freq_sorted[0])**2 score += (300 * nbad / n)**2 return Attributize(score=score, nbad=nbad)
def get_width_in_text_space(self) -> Decimal: """ This function calculates the width (in text space) of this GlyphLine """ w: Decimal = Decimal(0) for g in self._glyphs: glyph_width_in_text_space = g.width * self._font_size * Decimal( 0.001) # add word spacing where applicable if len(g.unicode_str) == 1 and isspace(g.unicode_str): glyph_width_in_text_space += self._word_spacing # horizontal scaling glyph_width_in_text_space *= self._horizontal_scaling / Decimal( 100) # add character spacing to character_width glyph_width_in_text_space += self._character_spacing # add character width to total w += glyph_width_in_text_space # subtract character spacing once (there are only N-1 spacings in a string of N characters) w -= self._character_spacing # return return w
def _recv(self): """Receive a message from the client. Return message (str) or None if failure """ RECV_SIZE = 1024 try: while (True): m = self._csocket.recv(RECV_SIZE) if not m: logging.error('failed to recv message, client disconnected') return None # Decode the incoming data as utf-8 ignoring any # control characters m = bytes([ b if ascii.isgraph(b) or ascii.isspace(b) else 0xff for b in m ]) self._recv_str = self._recv_str + m.decode('utf-8', 'ignore') if self._recv_str.find('\n') >= 0 or self._recv_str.find('\r') >= 0: lines = self._recv_str.splitlines() self._recv_str = '\n'.join(lines[1:]) return lines[0] except Exception as ex: logging.error('failed to recv message to client {0}: {1}'.format(self._csocket.getpeername(), ex)) return False return True
def processInput(self,key): if(ascii.isprint(key) or ascii.isspace(key)): self.text=self.text + chr(key) return True if(key==ascii.DEL): self.text=self.text[:-1] return True return False
def processInput(self, key): if (ascii.isprint(key) or ascii.isspace(key)): self.text = self.text + chr(key) return True if (key == ascii.DEL): self.text = self.text[:-1] return True return False
def do(self): r = self.reader b = r.buffer eol = r.eol() for c in b[r.pos:eol]: if not ascii.isspace(c): self.kill_range(r.pos, eol) return else: self.kill_range(r.pos, eol+1)
def alpha_to_unicode(alpha): ''' Convert an alpha to a Unicode character. ''' alpha = alpha.upper() try: if ASCII.isprint(alpha): # includes ASCII space, 0x20 return chr(UNICODE_BRAILLE_BASE + mapping.index(alpha)) elif ASCII.isspace(alpha): return alpha else: return chr(UNICODE_BRAILLE_BASE) except ValueError: return 0
def search_abilities(stdscr): my, mx = stdscr.getmaxyx() curses.noecho() #Get curses to do key conversion stdscr.keypad(True) search_input = "" selected = 0 search_results = search(search_input) render_static(stdscr, search_input, search_results, selected) while True: character = stdscr.getkey() if len(character) > 1: if "KEY_BACKSPACE" == character: search_input = search_input[:-1] search_results = search(search_input) selected = 0 elif "KEY_DOWN" == character and selected < len(search_results) - 1: selected += 1 elif "KEY_UP" == character and selected > 0: selected -= 1 else: if len(search_results) > 0: render_result(stdscr, search_results[selected]) else: curses.beep() elif character == "\n": if len(search_results) > 0: render_result(stdscr, search_results[selected]) else: curses.beep() elif ascii.isalnum(character) or ascii.isspace(character): search_input += character search_results = search(search_input) selected = 0 render_static(stdscr, search_input, search_results, selected)
def guessParsing(file1): # Look at the lines in the first 10000 chars for clues on the type. # disabled /*If the lines are around at least 1000 chars longs, assume it is binary*/ # If there are unprintable characters then binary if not all([isgraph(c) or isspace(c) for c in file1.read(10000)]): print "Binary file" parsed_as = 'Binary file' return parsed_as, None file1.seek(0) firstlines = file1.read(10000).splitlines() # if len(''.join(firstlines))/len(firstlines) >= 1000: # print "IGNORE" # print firstlines # file1.seek(0) # ret = json.loads(file1) percentContainingComma = len([1 for line in firstlines if line.find(',') != -1]) * 100 / len(firstlines) # numberOfSquareBrackets = len([1 for line in firstlines if line.find('[')!=-1]) file1.seek(0) lines = list(Lines(file1.read().splitlines())) # print lines if len(lines) == 0: print "Empty file" parsed_as = 'Empty file' return parsed_as, [] beginsWithBrackets = [line.startswith('{') or line.startswith('[') for line in lines[0:20]] if any(beginsWithBrackets): text = ' '.join(lines[beginsWithBrackets.index(True):]) text = re.sub(r', *([\]\}])', '\\1', text) if re.search(r'} *{', text) is not None: print "List of json maps" text = re.sub(r'} *{', '},{', text) text = '[' + text + ']' if re.search(r'\] *\[', text) is not None: print "List of json maps" text = re.sub(r'\] *\[', '],[', text) text = '[' + text + ']' try: print text ret = json.loads(text) print "Valid JSON" parsed_as = 'Valid JSON' return parsed_as, ret except: pass # print text text = re.sub(r'([\[\{,]) *([^" ])', '\\1"\\2', text) text = re.sub(r'([^" ]) *([\]\},])', '\\1"\\2', text) text = re.sub(r'"([0-9]+\.?[0-9]*e?[0-9]*)"', '\\1', text) # print text try: # print text ret = json.loads(text) print "Almost valid JSON" parsed_as = 'Almost valid JSON (multiple entries not explicitly in an array)' return parsed_as, ret except: pass # ParseArrayOrMap(text) # try: # ParseArrayOrMap(' '.join(lines)) # except: # print lines if percentContainingComma > 50: print "CSV" parsed_as = 'CSV (does not begin with bracket, most lines contain a comma)' ret = csv.reader(lines, delimiter=',') # ret = [line.split(',') for line in lines] else: parsed_as = 'SSV (does not begin with bracket, most lines do not contain a comma)' print "SSV" # print lines ret = csv.reader(lines, delimiter=' ', skipinitialspace=True) # ret = [line.split(' ') for line in lines] ret = [[parseSingle(i) for i in row] for row in ret] if len(ret) == 1: ret = ret[0] elif all([len(row) == 1 for row in ret]): ret = [row[0] for row in ret] return parsed_as, ret
def isgoodchar(x): return isspace(x) or isgraph(x)
def guessParsing(file1): # Look at the lines in the first 10000 chars for clues on the type. # disabled /*If the lines are around at least 1000 chars longs, assume it is binary*/ # If there are unprintable characters then binary if not all([isgraph(c) or isspace(c) for c in file1.read(10000)]): print "Binary file" parsed_as = 'Binary file' return parsed_as, None file1.seek(0) firstlines = file1.read(10000).splitlines() # if len(''.join(firstlines))/len(firstlines) >= 1000: # print "IGNORE" # print firstlines # file1.seek(0) # ret = json.loads(file1) percentContainingComma = len([ 1 for line in firstlines if line.find(',') != -1 ]) * 100 / len(firstlines) # numberOfSquareBrackets = len([1 for line in firstlines if line.find('[')!=-1]) file1.seek(0) lines = list(Lines(file1.read().splitlines())) # print lines if len(lines) == 0: print "Empty file" parsed_as = 'Empty file' return parsed_as, [] beginsWithBrackets = [ line.startswith('{') or line.startswith('[') for line in lines[0:20] ] if any(beginsWithBrackets): text = ' '.join(lines[beginsWithBrackets.index(True):]) text = re.sub(r', *([\]\}])', '\\1', text) if re.search(r'} *{', text) is not None: print "List of json maps" text = re.sub(r'} *{', '},{', text) text = '[' + text + ']' if re.search(r'\] *\[', text) is not None: print "List of json maps" text = re.sub(r'\] *\[', '],[', text) text = '[' + text + ']' try: print text ret = json.loads(text) print "Valid JSON" parsed_as = 'Valid JSON' return parsed_as, ret except: pass # print text text = re.sub(r'([\[\{,]) *([^" ])', '\\1"\\2', text) text = re.sub(r'([^" ]) *([\]\},])', '\\1"\\2', text) text = re.sub(r'"([0-9]+\.?[0-9]*e?[0-9]*)"', '\\1', text) # print text try: # print text ret = json.loads(text) print "Almost valid JSON" parsed_as = 'Almost valid JSON (multiple entries not explicitly in an array)' return parsed_as, ret except: pass # ParseArrayOrMap(text) # try: # ParseArrayOrMap(' '.join(lines)) # except: # print lines if percentContainingComma > 50: print "CSV" parsed_as = 'CSV (does not begin with bracket, most lines contain a comma)' ret = csv.reader(lines, delimiter=',') # ret = [line.split(',') for line in lines] else: parsed_as = 'SSV (does not begin with bracket, most lines do not contain a comma)' print "SSV" # print lines ret = csv.reader(lines, delimiter=' ', skipinitialspace=True) # ret = [line.split(' ') for line in lines] ret = [[parseSingle(i) for i in row] for row in ret] if len(ret) == 1: ret = ret[0] elif all([len(row) == 1 for row in ret]): ret = [row[0] for row in ret] return parsed_as, ret
def byte_to_readable(bdata): s = '' for b in bdata: s += chr(b) if ascii.isprint(b) or ascii.isspace(b) else '.' return s
def read(filename, included=None): """ Function to read a configuration file into a dictionary. Parmaeters: filename - the file to read included - files previously read (internal) Exceptions: IOError - when the configuration file cannot be read """ if not included: included = [] if filename in included: return {} included.append(filename) conffile = open(filename) options = {} while True: line = conffile.readline() if line == '': break # remove comments if '#' in line: line = line[:line.find('#')] # combine lines when the newline is escaped with \ while len(line) > 1 and line[-2] == '\\': line = line[:-2] + line[-1] next = conffile.readline() line += next if next == '': break line = line.strip() # process include statements if line.find("include") == 0 and isspace(line[7]): filename = line[8:].strip() options.update(read(filename, included)) continue # split 'key = value' into key and value and strip results pair = map(str.strip, line.split('=', 1)) # found key and value if len(pair) == 2: key, val = pair # found quoted string? if val and val[0] == val[-1] == '"': val = val[1:-1] # unquoted, found num? elif val: try: if "." in val: val = float(val) elif val[0] == '0': val = int(val, 8) else: val = int(val) except ValueError: pass # save key and value options[key] = val # found only key, value = None elif len(pair[0]) > 1: key = pair[0] options[key] = None return options