forked from ckreibich/scholar.py
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
168 lines (143 loc) · 7.47 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import optparse
import sys
import scholar as sc
def main():
usage = """demo.py [options] <query string>
A command-line interface to Google Scholar.
Examples:
# Retrieve one article written by Einstein on quantum theory:
demo.py -c 1 --author "albert einstein" --phrase "quantum theory"
# Retrieve a BibTeX entry for that quantum theory paper:
demo.py -c 1 -C 17749203648027613321 --citation bt
# Retrieve five articles written by Einstein after 1970 where the title
# does not contain the words "quantum" and "theory":
demo.py -c 5 -a "albert einstein" -t --none "quantum theory" --after 1970"""
fmt = optparse.IndentedHelpFormatter(max_help_position=50, width=100)
parser = optparse.OptionParser(usage=usage, formatter=fmt)
group = optparse.OptionGroup(parser, 'Query arguments',
'These options define search query arguments and parameters.')
group.add_option('-a', '--author', metavar='AUTHORS', default=None,
help='Author name(s)')
group.add_option('-A', '--all', metavar='WORDS', default=None, dest='allw',
help='Results must contain all of these words')
group.add_option('-s', '--some', metavar='WORDS', default=None,
help='Results must contain at least one of these words. Pass arguments in form -s "foo bar baz" for simple words, and -s "a phrase, another phrase" for phrases')
group.add_option('-n', '--none', metavar='WORDS', default=None,
help='Results must contain none of these words. See -s|--some re. formatting')
group.add_option('-p', '--phrase', metavar='PHRASE', default=None,
help='Results must contain exact phrase')
group.add_option('-t', '--title-only', action='store_true', default=False,
help='Search title only')
group.add_option('-P', '--pub', metavar='PUBLICATIONS', default=None,
help='Results must have appeared in this publication')
group.add_option('--after', metavar='YEAR', default=None,
help='Results must have appeared in or after given year')
group.add_option('--before', metavar='YEAR', default=None,
help='Results must have appeared in or before given year')
group.add_option('--no-patents', action='store_true', default=False,
help='Do not include patents in results')
group.add_option('--no-citations', action='store_true', default=False,
help='Do not include citations in results')
group.add_option('-C', '--cluster-id', metavar='CLUSTER_ID', default=None,
help='Do not search, just use articles in given cluster ID')
group.add_option('-c', '--count', type='int', default=None,
help='Maximum number of results')
parser.add_option_group(group)
group = optparse.OptionGroup(parser, 'Output format',
'These options control the appearance of the results.')
group.add_option('--txt', action='store_true',
help='Print article data in text format (default)')
group.add_option('--txt-globals', action='store_true',
help='Like --txt, but first print global results too')
group.add_option('--csv', action='store_true',
help='Print article data in CSV form (separator is "|")')
group.add_option('--csv-header', action='store_true',
help='Like --csv, but print header with column names')
group.add_option('--citation', metavar='FORMAT', default=None,
help='Print article details in standard citation format. Argument Must be one of "bt" (BibTeX), "en" (EndNote), "rm" (RefMan), or "rw" (RefWorks).')
parser.add_option_group(group)
group = optparse.OptionGroup(parser, 'Miscellaneous')
group.add_option('--cookie-file', metavar='FILE', default=None,
help='File to use for cookie storage. If given, will read any existing cookies if found at startup, and save resulting cookies in the end.')
group.add_option('-d', '--debug', action='count', default=0,
help='Enable verbose logging to stderr. Repeated options increase detail of debug output.')
group.add_option('-v', '--version', action='store_true', default=False,
help='Show version information')
parser.add_option_group(group)
options, _ = parser.parse_args()
# Show help if we have neither keyword search nor author name
if len(sys.argv) == 1:
parser.print_help()
return 1
if options.debug > 0:
options.debug = min(options.debug, sc.ScholarUtils.LOG_LEVELS['debug'])
sc.ScholarConf.LOG_LEVEL = options.debug
sc.ScholarUtils.log('info', 'using log level %d' % sc.ScholarConf.LOG_LEVEL)
if options.version:
print('This is demo.py %s.' % sc.ScholarConf.VERSION)
return 0
if options.cookie_file:
sc.ScholarConf.COOKIE_JAR_FILE = options.cookie_file
# Sanity-check the options: if they include a cluster ID query, it
# makes no sense to have search arguments:
if options.cluster_id is not None:
if options.author or options.allw or options.some or options.none \
or options.phrase or options.title_only or options.pub \
or options.after or options.before:
print('Cluster ID queries do not allow additional search arguments.')
return 1
querier = sc.ScholarQuerier()
settings = sc.ScholarSettings()
if options.citation == 'bt':
settings.set_citation_format(sc.ScholarSettings.CITFORM_BIBTEX)
elif options.citation == 'en':
settings.set_citation_format(sc.ScholarSettings.CITFORM_ENDNOTE)
elif options.citation == 'rm':
settings.set_citation_format(sc.ScholarSettings.CITFORM_REFMAN)
elif options.citation == 'rw':
settings.set_citation_format(sc.ScholarSettings.CITFORM_REFWORKS)
elif options.citation is not None:
print('Invalid citation link format, must be one of "bt", "en", "rm", or "rw".')
return 1
querier.apply_settings(settings)
if options.cluster_id:
query = sc.ClusterScholarQuery(cluster=options.cluster_id)
else:
query = sc.SearchScholarQuery()
if options.author:
query.set_author(options.author)
if options.allw:
query.set_words(options.allw)
if options.some:
query.set_words_some(options.some)
if options.none:
query.set_words_none(options.none)
if options.phrase:
query.set_phrase(options.phrase)
if options.title_only:
query.set_scope(True)
if options.pub:
query.set_pub(options.pub)
if options.after or options.before:
query.set_timeframe(options.after, options.before)
if options.no_patents:
query.set_include_patents(False)
if options.no_citations:
query.set_include_citations(False)
if options.count is not None:
options.count = min(options.count, sc.ScholarConf.MAX_PAGE_RESULTS)
query.set_num_page_results(options.count)
querier.send_query(query)
if options.csv:
sc.csv(querier)
elif options.csv_header:
sc.csv(querier, header=True)
elif options.citation is not None:
sc.citation_export(querier)
else:
sc.txt(querier, with_globals=options.txt_globals)
if options.cookie_file:
querier.save_cookies()
return 0
if __name__ == "__main__":
sys.exit(main())