def exercise_basic(): tests = [ ["a", ['a'] ], ["a and b", ['a', 'b', 'and'] ], ["a or b", ['a', 'b', 'or'] ], ["not a or b", ['a', 'not', 'b', 'or'] ], ["not a or b and c", ['a', 'not', 'b', 'c', 'and', 'or'] ], ["not (a or b) and c", ['a', 'b', 'or', 'not', 'c', 'and'] ], ["(not (a or b) and c)", ['a', 'b', 'or', 'not', 'c', 'and'] ], ["not ((a or b) and c)", ['a', 'b', 'or', 'c', 'and', 'not'] ], ] verbose = "--verbose" in sys.argv[1:] for input_string,expected_result in tests: infix = tokenizer.word_iterator(input_string=input_string) if (verbose): print input_string postfix = [word for word,word_iterator in simple_parser.infix_as_postfix(infix)] if (verbose): print [word.value for word in postfix]
def rewrite_parser( word_iterator, stop_if_parse_stack_is_empty=False, stop_word=None, expect_nonmatching_closing_parenthesis=False): result_stack = [] for word,word_iterator in simple_parser.infix_as_postfix( word_iterator=word_iterator, stop_if_parse_stack_is_empty=stop_if_parse_stack_is_empty, stop_word=stop_word, expect_nonmatching_closing_parenthesis =expect_nonmatching_closing_parenthesis): if (word.value == "not"): arg = result_stack.pop() result_stack.append("(!%s)" % arg) elif (word.value in ["and", "or"]): rhs = result_stack.pop() lhs = result_stack.pop() if (word.value == "and"): result_stack.append("(%s&%s)" % (lhs, rhs)) else: result_stack.append("(%s|%s)" % (lhs, rhs)) elif (word.value == "within"): assert word_iterator.pop().value == "(" radius = float(word_iterator.pop().value) assert word_iterator.pop().value == "," nested_result = rewrite_parser( word_iterator=word_iterator, expect_nonmatching_closing_parenthesis=True) if (nested_result == ""): raise RuntimeError("Missing argument.") result_stack.append("@(%.2f,%s)" % (radius, nested_result)) elif (word.value == "around"): assert word_iterator.pop().value == "(" nested_result = rewrite_parser( word_iterator=word_iterator, stop_word=",") if (nested_result == ""): raise RuntimeError("Missing argument.") radius = float(word_iterator.pop().value) assert word_iterator.pop().value == ")" result_stack.append("@(%.2f,%s)" % (radius, nested_result)) elif (word.value == "for"): var = word_iterator.pop().value assert word_iterator.pop().value == "in" nested_result = rewrite_parser( word_iterator=word_iterator, stop_if_parse_stack_is_empty=True) if (nested_result == ""): raise RuntimeError("Missing argument.") result_stack.append("(for %s in %s)" % (var, nested_result)) else: result_stack.append(word.value) if (len(result_stack) == 0): return "" result = result_stack[0] for item in result_stack[1:]: result = "(%s&%s)" % (result, item) return result
def rewrite_parser(word_iterator, stop_if_parse_stack_is_empty=False, stop_word=None, expect_nonmatching_closing_parenthesis=False): result_stack = [] for word, word_iterator in simple_parser.infix_as_postfix( word_iterator=word_iterator, stop_if_parse_stack_is_empty=stop_if_parse_stack_is_empty, stop_word=stop_word, expect_nonmatching_closing_parenthesis= expect_nonmatching_closing_parenthesis): if (word.value == "not"): arg = result_stack.pop() result_stack.append("(!%s)" % arg) elif (word.value in ["and", "or"]): rhs = result_stack.pop() lhs = result_stack.pop() if (word.value == "and"): result_stack.append("(%s&%s)" % (lhs, rhs)) else: result_stack.append("(%s|%s)" % (lhs, rhs)) elif (word.value == "within"): assert word_iterator.pop().value == "(" radius = float(word_iterator.pop().value) assert word_iterator.pop().value == "," nested_result = rewrite_parser( word_iterator=word_iterator, expect_nonmatching_closing_parenthesis=True) if (nested_result == ""): raise RuntimeError("Missing argument.") result_stack.append("@(%.2f,%s)" % (radius, nested_result)) elif (word.value == "around"): assert word_iterator.pop().value == "(" nested_result = rewrite_parser(word_iterator=word_iterator, stop_word=",") if (nested_result == ""): raise RuntimeError("Missing argument.") radius = float(word_iterator.pop().value) assert word_iterator.pop().value == ")" result_stack.append("@(%.2f,%s)" % (radius, nested_result)) elif (word.value == "for"): var = word_iterator.pop().value assert word_iterator.pop().value == "in" nested_result = rewrite_parser(word_iterator=word_iterator, stop_if_parse_stack_is_empty=True) if (nested_result == ""): raise RuntimeError("Missing argument.") result_stack.append("(for %s in %s)" % (var, nested_result)) else: result_stack.append(word.value) if (len(result_stack) == 0): return "" result = result_stack[0] for item in result_stack[1:]: result = "(%s&%s)" % (result, item) return result
def exercise_basic(): tests = [ ["a", ['a']], ["a and b", ['a', 'b', 'and']], ["a or b", ['a', 'b', 'or']], ["not a or b", ['a', 'not', 'b', 'or']], ["not a or b and c", ['a', 'not', 'b', 'c', 'and', 'or']], ["not (a or b) and c", ['a', 'b', 'or', 'not', 'c', 'and']], ["(not (a or b) and c)", ['a', 'b', 'or', 'not', 'c', 'and']], ["not ((a or b) and c)", ['a', 'b', 'or', 'c', 'and', 'not']], ] verbose = "--verbose" in sys.argv[1:] for input_string, expected_result in tests: infix = tokenizer.word_iterator(input_string=input_string) if (verbose): print input_string postfix = [ word for word, word_iterator in simple_parser.infix_as_postfix(infix) ] if (verbose): print[word.value for word in postfix] assert [word.value for word in postfix] == expected_result if (verbose): print
def selection_parser(self, word_iterator, optional=True, callback=None, stop_word=None, expect_nonmatching_closing_parenthesis=False): have_optional = False result_stack = [] for word, word_iterator in simple_parser.infix_as_postfix( word_iterator=word_iterator, stop_word=stop_word, expect_nonmatching_closing_parenthesis= expect_nonmatching_closing_parenthesis): lword = word.value.lower() def raise_syntax_error(): raise RuntimeError( 'Atom selection syntax error at word "%s".' % lword) if (lword == "optional"): if (len(result_stack) != 0): raise Sorry('"optional" can appear only at the beginning.') if (have_optional): raise Sorry('"optional" can appear only once.') have_optional = True elif (lword == "not"): assert len(result_stack) >= 1 arg = result_stack.pop() result_stack.append(~arg) elif (lword in ["and", "or"]): assert len(result_stack) >= 2 rhs = result_stack.pop() lhs = result_stack.pop() if (lword == "and"): result_stack.append(lhs & rhs) else: result_stack.append(lhs | rhs) else: if (lword == "all"): result_stack.append(flex.bool(self.n_seq, True)) elif (lword == "none"): result_stack.append(flex.bool(self.n_seq, False)) elif (lword == "name"): result_stack.append( self.sel_name( pattern=word_iterator.pop_argument(word.value))) elif (lword in ["altloc", "altid"]): result_stack.append( self.sel_altloc( pattern=word_iterator.pop_argument(word.value))) elif (lword == "resname"): result_stack.append( self.sel_resname( pattern=word_iterator.pop_argument(word.value))) elif (lword == "chain"): result_stack.append( self.sel_chain_id( pattern=word_iterator.pop_argument(word.value))) elif (lword in ["resseq", "resid", "resi", "model"]): arg = word_iterator.pop_argument(word.value) def try_compose_range(): def is_cont(): if (len(arg_cont.value) == 0): return False return ("0123456789".find(arg_cont.value[0]) >= 0) i_colon = arg.value.find(":") if (i_colon < 0): arg_cont = word_iterator.try_pop() if (arg_cont is None): return arg.value, -1 if (not arg_cont.value.startswith(":")): word_iterator.backup() return arg.value, -1 if (len(arg_cont.value) == 1): arg_cont = word_iterator.try_pop() if (arg_cont is None): return arg.value + ":", len(arg.value) if (not is_cont()): word_iterator.backup() return arg.value + ":", len(arg.value) return arg.value + ":" + arg_cont.value, len( arg.value) return arg.value + arg_cont.value, len(arg.value) elif (i_colon + 1 == len(arg.value)): arg_cont = word_iterator.try_pop() if (arg_cont is not None): if (is_cont()): return arg.value + arg_cont.value, i_colon word_iterator.backup() return arg.value, i_colon def try_compose_sequence(): arg_next = word_iterator.try_pop() if (arg_next is None): word_iterator.backup() return None, None lnext = arg_next.value.lower() if (lnext == "through"): arg_final = word_iterator.pop_argument( arg_next.value) return arg.value, arg_final.value word_iterator.backup() return (None, None) val, i_colon = try_compose_range() if (i_colon < 0): if (lword == "resseq"): result_stack.append(self.sel_resseq(pattern=arg)) elif (lword in ["resid", "resi"]): start, stop = try_compose_sequence() if (start is None): result_stack.append( self.sel_resid(pattern=arg)) else: result_stack.append( self.sel_resid_sequence(start=start, stop=stop)) else: result_stack.append(self.sel_model_id(pattern=arg)) else: start = val[:i_colon] stop = val[i_colon + 1:] if (lword == "resseq"): result_stack.append( self.sel_resseq_range(start=start, stop=stop)) elif (lword in ["resid", "resi"]): result_stack.append( self.sel_resid_range(start=start, stop=stop)) else: result_stack.append( self.sel_model_id_range(start=start, stop=stop)) elif (lword == "icode"): result_stack.append( self.sel_icode( pattern=word_iterator.pop_argument(word.value))) elif (lword == "segid"): result_stack.append( self.sel_segid( pattern=word_iterator.pop_argument(word.value))) elif (lword == "element"): result_stack.append( self.sel_element( pattern=word_iterator.pop_argument(word.value))) elif (lword == "charge"): result_stack.append( self.sel_charge( pattern=word_iterator.pop_argument(word.value))) elif (lword == "anisou"): result_stack.append(self.sel_anisou()) elif (lword == "pepnames"): result_stack.append(self.sel_pepnames()) elif ((lword == "protein" or lword == "peptide") and callback is None): # if there is callback, these keywords shoudl be processed there, # most likely it is pdb_interpretation result_stack.append(self.sel_protein()) elif lword == "nucleotide" and callback is None: result_stack.append(self.sel_nucleotide()) elif (lword == "single_atom_residue"): result_stack.append(self.sel_single_atom_residue()) elif (lword == "water"): result_stack.append(self.sel_water()) elif (lword == "hetero") or (lword == "hetatm"): result_stack.append(self.sel_hetero()) elif (lword == "bfactor") or (lword == "occupancy"): op = word_iterator.pop_argument(word.value).value if (not op in [">", "<", "="]): raise_syntax_error() else: arg_next = word_iterator.try_pop() lnext = arg_next.value try: val = float(lnext) except ValueError: raise_syntax_error() else: if (lword == "bfactor"): result_stack.append(self.sel_bfactor(op, val)) else: result_stack.append(self.sel_occupancy( op, val)) elif ((lword == "within" or lword == 'residues_within') and (self.special_position_settings is not None)): assert word_iterator.pop().value == "(" radius = float(word_iterator.pop().value) assert word_iterator.pop().value == "," sel = self.selection_parser( word_iterator=word_iterator, callback=callback, expect_nonmatching_closing_parenthesis=True) if lword == 'within': result_stack.append( self.sel_within(radius=radius, primary_selection=sel)) elif lword == 'residues_within': result_stack.append( self.sel_residues_within(radius=radius, primary_selection=sel)) elif (callback is not None): if (not callback(word=word, word_iterator=word_iterator, result_stack=result_stack)): raise_syntax_error() else: raise_syntax_error() if (optional): have_optional = False if (len(result_stack) == 0): if (have_optional): return None return flex.bool(self.n_seq, False) selection = result_stack[0] for result in result_stack[1:]: selection &= result if (have_optional and selection.all_eq(False)): return None return selection
def selection_parser(self, word_iterator, optional=True, callback=None, stop_word=None, expect_nonmatching_closing_parenthesis=False): have_optional = False result_stack = [] for word,word_iterator in simple_parser.infix_as_postfix( word_iterator=word_iterator, stop_word=stop_word, expect_nonmatching_closing_parenthesis =expect_nonmatching_closing_parenthesis): lword = word.value.lower() def raise_syntax_error(): raise RuntimeError( 'Atom selection syntax error at word "%s".' % lword) if (lword == "optional"): if (len(result_stack) != 0): raise Sorry('"optional" can appear only at the beginning.') if (have_optional): raise Sorry('"optional" can appear only once.') have_optional = True elif (lword == "not"): assert len(result_stack) >= 1 arg = result_stack.pop() result_stack.append(~arg) elif (lword in ["and", "or"]): assert len(result_stack) >= 2 rhs = result_stack.pop() lhs = result_stack.pop() if (lword == "and"): result_stack.append(lhs & rhs) else: result_stack.append(lhs | rhs) else: if (lword == "all"): result_stack.append(flex.bool(self.n_seq, True)) elif (lword == "none"): result_stack.append(flex.bool(self.n_seq, False)) elif (lword == "name"): result_stack.append( self.sel_name(pattern=word_iterator.pop_argument(word.value))) elif (lword in ["altloc", "altid"]): result_stack.append( self.sel_altloc(pattern=word_iterator.pop_argument(word.value))) elif (lword == "resname"): result_stack.append( self.sel_resname(pattern=word_iterator.pop_argument(word.value))) elif (lword == "chain"): result_stack.append( self.sel_chain_id(pattern=word_iterator.pop_argument(word.value))) elif (lword in ["resseq", "resid", "resi", "model"]): arg = word_iterator.pop_argument(word.value) def try_compose_range(): def is_cont(): if (len(arg_cont.value) == 0): return False return ("0123456789".find(arg_cont.value[0]) >= 0) i_colon = arg.value.find(":") if (i_colon < 0): arg_cont = word_iterator.try_pop() if (arg_cont is None): return arg.value, -1 if (not arg_cont.value.startswith(":")): word_iterator.backup() return arg.value, -1 if (len(arg_cont.value) == 1): arg_cont = word_iterator.try_pop() if (arg_cont is None): return arg.value+":", len(arg.value) if (not is_cont()): word_iterator.backup() return arg.value+":", len(arg.value) return arg.value+":"+arg_cont.value, len(arg.value) return arg.value+arg_cont.value, len(arg.value) elif (i_colon+1 == len(arg.value)): arg_cont = word_iterator.try_pop() if (arg_cont is not None): if (is_cont()): return arg.value+arg_cont.value, i_colon word_iterator.backup() return arg.value, i_colon def try_compose_sequence () : arg_next = word_iterator.try_pop() if (arg_next is None) : word_iterator.backup() return None, None lnext = arg_next.value.lower() if (lnext == "through") : arg_final = word_iterator.pop_argument(arg_next.value) return arg.value, arg_final.value word_iterator.backup() return (None, None) val, i_colon = try_compose_range() if (i_colon < 0): if (lword == "resseq"): result_stack.append(self.sel_resseq(pattern=arg)) elif (lword in ["resid", "resi"]): start, stop = try_compose_sequence() if (start is None) : result_stack.append(self.sel_resid(pattern=arg)) else : result_stack.append(self.sel_resid_sequence(start=start, stop=stop)) else: result_stack.append(self.sel_model_id(pattern=arg)) else: start = val[:i_colon] stop = val[i_colon+1:] if (lword == "resseq"): result_stack.append( self.sel_resseq_range(start=start, stop=stop)) elif (lword in ["resid", "resi"]): result_stack.append( self.sel_resid_range(start=start, stop=stop)) else: result_stack.append( self.sel_model_id_range(start=start, stop=stop)) elif (lword == "icode"): result_stack.append( self.sel_icode(pattern=word_iterator.pop_argument(word.value))) elif (lword == "segid"): result_stack.append( self.sel_segid(pattern=word_iterator.pop_argument(word.value))) elif (lword == "element"): result_stack.append( self.sel_element(pattern=word_iterator.pop_argument(word.value))) elif (lword == "charge"): result_stack.append( self.sel_charge(pattern=word_iterator.pop_argument(word.value))) elif (lword == "anisou"): result_stack.append(self.sel_anisou()) elif (lword == "pepnames"): result_stack.append(self.sel_pepnames()) elif ((lword == "protein" or lword == "peptide") and callback is None): # if there is callback, these keywords shoudl be processed there, # most likely it is pdb_interpretation result_stack.append(self.sel_protein()) elif lword == "nucleotide" and callback is None: result_stack.append(self.sel_nucleotide()) elif (lword == "single_atom_residue"): result_stack.append(self.sel_single_atom_residue()) elif (lword == "water"): result_stack.append(self.sel_water()) elif (lword == "hetero") or (lword == "hetatm") : result_stack.append(self.sel_hetero()) elif (lword == "bfactor") or (lword == "occupancy") : op = word_iterator.pop_argument(word.value).value if (not op in [">", "<", "="]) : raise_syntax_error() else : arg_next = word_iterator.try_pop() lnext = arg_next.value try : val = float(lnext) except ValueError : raise_syntax_error() else : if (lword == "bfactor") : result_stack.append(self.sel_bfactor(op, val)) else : result_stack.append(self.sel_occupancy(op, val)) elif ((lword == "within") and (self.special_position_settings is not None)) : assert word_iterator.pop().value == "(" radius = float(word_iterator.pop().value) assert word_iterator.pop().value == "," sel = self.selection_parser( word_iterator=word_iterator, callback=callback, expect_nonmatching_closing_parenthesis=True) result_stack.append(self.sel_within(radius=radius, primary_selection=sel)) elif (callback is not None): if (not callback( word=word, word_iterator=word_iterator, result_stack=result_stack)): raise_syntax_error() else: raise_syntax_error() if (optional): have_optional = False if (len(result_stack) == 0): if (have_optional): return None return flex.bool(self.n_seq, False) selection = result_stack[0] for result in result_stack[1:]: selection &= result if (have_optional and selection.all_eq(False)): return None return selection