Exemple #1
0
 def test_get_copy_fields(self):
     '''
     Tests the method to get copy fields from Solr.
     '''
     reindexer = Reindexer(source=self.solr, source_coll=self.colls[0], dest=self.solr, dest_coll='doesntmatter')
     self.assertEqual(reindexer._get_copy_fields(),
                      [field['dest'] for field in self.solr.schema.get_schema_copyfields(self.colls[0])])
Exemple #2
0
 def test_query_gen(self):
     '''
     Tests the method to get copy fields from Solr.
     '''
     reindexer = Reindexer(source=self.solr, source_coll=self.colls[0], dest=self.solr, dest_coll='doesntmatter')
     self.assertEqual(reindexer._get_query('cursor'),
                      {'cursorMark': 'cursor', 'rows': reindexer._rows, 'q': '*:*', 'sort': 'id desc'})
 def test_get_date_range_query(self):
     """
     Checks the date_range_query generation function. Since it's pretty simple, running all the tests as one
     """
     solr = SolrClient(test_config["SOLR_SERVER"][0], devel=True, auth=test_config["SOLR_CREDENTIALS"])
     reindexer = Reindexer(
         source=solr, source_coll="source_coll", dest=solr, dest_coll="dest_coll", date_field="index_date"
     )
     self.assertEqual(
         reindexer._get_date_range_query("2015-11-10", "2015-12-11"),
         {
             "rows": 0,
             "facet.range.end": "2015-12-11",
             "facet": "true",
             "facet.range": "index_date",
             "facet.range.start": "2015-11-10",
             "q": "*:*",
             "facet.range.include": "all",
             "facet.range.gap": "+1DAY",
         },
     )
     self.assertEqual(
         reindexer._get_date_range_query("2015-11-10", "2015-12-11", date_field="date123"),
         {
             "rows": 0,
             "facet.range.end": "2015-12-11",
             "facet": "true",
             "facet.range": "date123",
             "facet.range.start": "2015-11-10",
             "q": "*:*",
             "facet.range.include": "all",
             "facet.range.gap": "+1DAY",
         },
     )
     self.assertEqual(
         reindexer._get_date_range_query("2015-11-10", "2015-12-11", date_field="date123", timespan="MONTH"),
         {
             "rows": 0,
             "facet.range.end": "2015-12-11",
             "facet": "true",
             "facet.range": "date123",
             "facet.range.start": "2015-11-10",
             "q": "*:*",
             "facet.range.include": "all",
             "facet.range.gap": "+1MONTH",
         },
     )
     self.assertEqual(
         reindexer._get_date_range_query("2015-11-10", "2015-12-11", timespan="MONTH"),
         {
             "rows": 0,
             "facet.range.end": "2015-12-11",
             "facet": "true",
             "facet.range": "index_date",
             "facet.range.start": "2015-11-10",
             "q": "*:*",
             "facet.range.include": "all",
             "facet.range.gap": "+1MONTH",
         },
     )
 def test_solr_to_solr_reindex_and_resume_reverse(self):
     """
     Only reindexes half of the collection on the first time. Then goes back and does a resume to make sure it works. 
     """
     self._index_docs(50000, self.colls[0])
     solr = SolrClient(test_config["SOLR_SERVER"][0], auth=test_config["SOLR_CREDENTIALS"])
     reindexer = Reindexer(
         source=solr, source_coll="source_coll", dest=solr, dest_coll="dest_coll", date_field="date"
     )
     # Make sure only source has data
     self.assertEqual(len(solr.query(self.colls[0], {"q": "*:*", "rows": 10000000}).docs), 50000)
     self.assertEqual(len(solr.query(self.colls[1], {"q": "*:*", "rows": 10000000}).docs), 0)
     # This gets somehwat of a mid point date in the range.
     midpoint = datetime.datetime.now() - datetime.timedelta(days=((self._end_date - self._start_date).days / 2))
     # Reindex approximately half of the data by restricting FQ
     reindexer.reindex(fq=["date:[{} TO *]".format(midpoint.isoformat() + "Z")])
     sleep(10)
     # Make sure we have at least 20% of the data.
     dest_count = len(solr.query(self.colls[1], {"q": "*:*", "rows": 10000000}).docs)
     s_count = len(solr.query(self.colls[0], {"q": "*:*", "rows": 10000000}).docs)
     self.assertTrue(s_count > dest_count > s_count * 0.20)
     reindexer.resume()
     sleep(10)
     # Make sure countc match up after reindex
     self.assertEqual(
         len(solr.query(self.colls[0], {"q": "*:*", "rows": 10000000}).docs),
         len(solr.query(self.colls[1], {"q": "*:*", "rows": 10000000}).docs),
     )
Exemple #5
0
 def test_solr_to_solr_with_date(self):
     self._index_docs(50000, self.colls[0])
     solr = SolrClient(test_config['SOLR_SERVER'][0],
                       devel=True,
                       auth=test_config['SOLR_CREDENTIALS'])
     reindexer = Reindexer(source=solr,
                           source_coll='source_coll',
                           dest=solr,
                           dest_coll='dest_coll',
                           date_field='index_date')
     reindexer.reindex()
     try:
         self.assertTrue(solr.transport._action_log[1]['params']['params']
                         ['sort'] == 'index_date asc, id desc')
     except KeyError:
         self.assertTrue(solr.transport._action_log[2]['params']['params']
                         ['sort'] == 'index_date asc, id desc')
     self.assertEqual(
         solr.query(self.colls[0], {
             'q': '*:*',
             'rows': 10000000
         }).docs.sort(key=lambda x: x['id']),
         solr.query(self.colls[1], {
             'q': '*:*',
             'rows': 10000000
         }).docs.sort(key=lambda x: x['id']),
     )
 def test_solr_to_solr(self):
     self._index_docs(50000, self.colls[0])
     reindexer = Reindexer(source=self.solr, source_coll="source_coll", dest=self.solr, dest_coll="dest_coll")
     reindexer.reindex()
     self.assertEquals(
         self.solr.query(self.colls[0], {"q": "*:*", "rows": 10000000}).docs.sort(key=lambda x: x["id"]),
         self.solr.query(self.colls[1], {"q": "*:*", "rows": 10000000}).docs.sort(key=lambda x: x["id"]),
     )
Exemple #7
0
 def test_solr_to_solr(self):
     self._index_docs(50000, self.colls[0])
     reindexer = Reindexer(source=self.solr, source_coll='source_coll', dest=self.solr, dest_coll='dest_coll')
     reindexer.reindex()
     self.assertEqual(
         self.solr.query(self.colls[0], {'q': '*:*', 'rows': 10000000}).docs.sort(key=lambda x: x['id']),
         self.solr.query(self.colls[1], {'q': '*:*', 'rows': 10000000}).docs.sort(key=lambda x: x['id']),
     )
Exemple #8
0
 def test_query_gen_pershard_distrib(self):
     '''
     Tests the method to get copy fields from Solr.
     '''
     reindexer = Reindexer(source=self.solr, source_coll=self.colls[0], dest=self.solr, dest_coll='doesntmatter',
                           per_shard=True)
     q = reindexer._get_query('cursor')
     self.assertTrue('distrib' in q and q['distrib'] == 'false')
 def test_query_gen(self):
     """
     Tests the method to get copy fields from Solr. 
     """
     reindexer = Reindexer(source=self.solr, source_coll=self.colls[0], dest=self.solr, dest_coll="doesntmatter")
     self.assertEqual(
         reindexer._get_query("cursor"),
         {"cursorMark": "cursor", "rows": reindexer._rows, "q": "*:*", "sort": "id desc"},
     )
Exemple #10
0
 def test_get_copy_fields(self):
     """
     Tests the method to get copy fields from Solr. 
     """
     reindexer = Reindexer(source=self.solr, source_coll=self.colls[0], dest=self.solr, dest_coll="doesntmatter")
     self.assertEqual(
         reindexer._get_copy_fields(),
         [field["dest"] for field in self.solr.schema.get_schema_copyfields(self.colls[0])],
     )
Exemple #11
0
 def test_get_date_facet_counts_not_day(self):
     '''
     Checks the date_range_query generation function. Since it's pretty simple, running all the tests as one
     '''
     self._index_docs(50000, self.colls[0])
     solr = SolrClient(test_config['SOLR_SERVER'][0], devel=True, auth=test_config['SOLR_CREDENTIALS'])
     reindexer = Reindexer(source=solr, source_coll='source_coll', dest=solr, dest_coll='dest_coll',
                           date_field='date')
     # Testing this one
     with self.assertRaises(ValueError):
         source_facet, dest_facet = reindexer._get_date_facet_counts('MONTH', 'date')
Exemple #12
0
 def test_remove_copy_fields_from_data(self):
     index = IndexQ(test_config["indexqbase"], "test_reindexer", size=0)
     for dir in ["_todo_dir", "_done_dir"]:
         [os.remove(x) for x in index.get_all_as_list(dir=dir)]
     reindexer = Reindexer(source=self.solr, source_coll="source_coll", dest=index)
     reindexer.reindex()
     from_files = self.get_all_json_from_indexq(index)
     excluded_fields = reindexer._ignore_fields
     for doc in from_files:
         for field in excluded_fields:
             if field in doc:
                 print(doc)
Exemple #13
0
 def test_remove_copy_fields_from_data(self):
     index = IndexQ(test_config['indexqbase'], 'test_reindexer', size=0)
     for dir in ['_todo_dir', '_done_dir']:
         [os.remove(x) for x in index.get_all_as_list(dir=dir)]
     reindexer = Reindexer(source=self.solr, source_coll='source_coll', dest=index)
     reindexer.reindex()
     from_files = self.get_all_json_from_indexq(index)
     excluded_fields = reindexer._ignore_fields
     for doc in from_files:
         for field in excluded_fields:
             if field in doc:
                 print(doc)
Exemple #14
0
 def test_get_date_facet_counts_not_day(self):
     """
     Checks the date_range_query generation function. Since it's pretty simple, running all the tests as one
     """
     self._index_docs(50000, self.colls[0])
     solr = SolrClient(test_config["SOLR_SERVER"][0], devel=True, auth=test_config["SOLR_CREDENTIALS"])
     reindexer = Reindexer(
         source=solr, source_coll="source_coll", dest=solr, dest_coll="dest_coll", date_field="date"
     )
     # Testing this one
     with self.assertRaises(ValueError):
         source_facet, dest_facet = reindexer._get_date_facet_counts("MONTH", "date")
Exemple #15
0
 def test_get_edge_date(self):
     '''
     Checks to make sure _get_edge_date returns correct start and end dates.
     '''
     self._index_docs(50000, self.colls[0])
     solr = SolrClient(test_config['SOLR_SERVER'][0], devel=True, auth=test_config['SOLR_CREDENTIALS'])
     reindexer = Reindexer(source=solr, source_coll='source_coll', dest=solr, dest_coll='dest_coll',
                           date_field='index_date')
     solr_end_date_string = reindexer._get_edge_date('date', 'desc')
     solr_start_date_string = reindexer._get_edge_date('date', 'asc')
     self.assertTrue(self._start_date.date(),
                     datetime.datetime.strptime(solr_start_date_string, '%Y-%m-%dT%H:%M:%S.%fZ'))
     self.assertTrue(self._end_date.date(),
                     datetime.datetime.strptime(solr_end_date_string, '%Y-%m-%dT%H:%M:%S.%fZ'))
Exemple #16
0
 def test_solr_to_indexq(self):
     """
     Will export documents from Solr and put them into an IndexQ. 
     """
     index = IndexQ(test_config["indexqbase"], "test_reindexer", size=0)
     for dir in ["_todo_dir", "_done_dir"]:
         [os.remove(x) for x in index.get_all_as_list(dir=dir)]
     self._index_docs(5000, self.colls[0])
     reindexer = Reindexer(source=self.solr, source_coll="source_coll", dest=index)
     reindexer.reindex()
     from_files = self.get_all_json_from_indexq(index)
     from_solr = self.solr.query("source_coll", {"q": "*:*", "rows": 5000}).docs
     from_solr = reindexer._trim_fields(from_solr)
     self.assertEqual(sorted(from_files, key=lambda x: x["id"]), sorted(from_solr, key=lambda x: x["id"]))
Exemple #17
0
 def test_solr_to_indexq(self):
     '''
     Will export documents from Solr and put them into an IndexQ.
     '''
     index = IndexQ(test_config['indexqbase'], 'test_reindexer', size=0)
     for dir in ['_todo_dir', '_done_dir']:
         [os.remove(x) for x in index.get_all_as_list(dir=dir)]
     self._index_docs(5000, self.colls[0])
     reindexer = Reindexer(source=self.solr, source_coll='source_coll', dest=index)
     reindexer.reindex()
     from_files = self.get_all_json_from_indexq(index)
     from_solr = self.solr.query('source_coll', {'q': '*:*', 'rows': 5000}).docs
     from_solr = reindexer._trim_fields(from_solr)
     self.assertEqual(sorted(from_files, key=lambda x: x['id']), sorted(from_solr, key=lambda x: x['id']))
Exemple #18
0
 def test_get_date_facet_counts_without_start_date(self):
     '''
     Checks the date_range_query generation function. Since it's pretty simple, running all the tests as one
     '''
     self._index_docs(50000, self.colls[0])
     solr = SolrClient(test_config['SOLR_SERVER'][0], devel=True, auth=test_config['SOLR_CREDENTIALS'])
     reindexer = Reindexer(source=solr, source_coll='source_coll', dest=solr, dest_coll='dest_coll',
                           date_field='date')
     # Testing this one
     source_facet, dest_facet = reindexer._get_date_facet_counts('DAY', 'date')
     for dt_range in source_facet:
         dt = datetime.datetime.strptime(dt_range, '%Y-%m-%dT%H:%M:%SZ').date().isoformat()
         if source_facet[dt_range] != self.date_counts[dt]:
             logging.info("{} - {} - {}".format(dt, source_facet[dt_range], self.date_counts[dt]))
         self.assertEqual(source_facet[dt_range], self.date_counts[dt])
Exemple #19
0
 def test_solr_to_solr_with_date(self):
     self._index_docs(50000, self.colls[0])
     solr = SolrClient(test_config["SOLR_SERVER"][0], devel=True, auth=test_config["SOLR_CREDENTIALS"])
     reindexer = Reindexer(
         source=solr, source_coll="source_coll", dest=solr, dest_coll="dest_coll", date_field="index_date"
     )
     reindexer.reindex()
     try:
         self.assertTrue(solr.transport._action_log[1]["params"]["params"]["sort"] == "index_date asc, id desc")
     except KeyError:
         self.assertTrue(solr.transport._action_log[2]["params"]["params"]["sort"] == "index_date asc, id desc")
     self.assertEqual(
         solr.query(self.colls[0], {"q": "*:*", "rows": 10000000}).docs.sort(key=lambda x: x["id"]),
         solr.query(self.colls[1], {"q": "*:*", "rows": 10000000}).docs.sort(key=lambda x: x["id"]),
     )
Exemple #20
0
 def test_solr_to_solr_resume_checkonly(self):
     '''
     Checks the date_range_query generation function. Since it's pretty simple, running all the tests as one
     '''
     self._index_docs(50000, self.colls[0])
     solr = SolrClient(test_config['SOLR_SERVER'][0], devel=True, auth=test_config['SOLR_CREDENTIALS'])
     reindexer = Reindexer(source=solr, source_coll='source_coll', dest=solr, dest_coll='dest_coll',
                           date_field='date')
     # Make sure only source has data
     self.assertEqual(len(solr.query(self.colls[0], {'q': '*:*', 'rows': 10000000}).docs), 50000)
     self.assertEqual(len(solr.query(self.colls[1], {'q': '*:*', 'rows': 10000000}).docs), 0)
     reindexer.resume(check=True)
     # Makes sure nothing got indexed
     self.assertEqual(len(solr.query(self.colls[0], {'q': '*:*', 'rows': 10000000}).docs), 50000)
     self.assertEqual(len(solr.query(self.colls[1], {'q': '*:*', 'rows': 10000000}).docs), 0)
Exemple #21
0
 def test_solr_to_solr_reindexer_per_shard(self):
     self._index_docs(50000, self.colls[0])
     solr = SolrClient(test_config["SOLR_SERVER"][0], auth=test_config["SOLR_CREDENTIALS"])
     reindexer = Reindexer(
         source=solr, source_coll="source_coll", dest=solr, dest_coll="dest_coll", per_shard=True, date_field="date"
     )
     # Make sure only source has data
     self.assertEqual(len(solr.query(self.colls[0], {"q": "*:*", "rows": 10000000}).docs), 50000)
     self.assertEqual(len(solr.query(self.colls[1], {"q": "*:*", "rows": 10000000}).docs), 0)
     reindexer.reindex()
     # sloppy check over here, will improve later
     self.assertEqual(
         len(solr.query(self.colls[0], {"q": "*:*", "rows": 10000000}).docs),
         len(solr.query(self.colls[1], {"q": "*:*", "rows": 10000000}).docs),
     )
Exemple #22
0
 def test_solr_to_solr_resume_checkonly(self):
     """
     Checks the date_range_query generation function. Since it's pretty simple, running all the tests as one
     """
     self._index_docs(50000, self.colls[0])
     solr = SolrClient(test_config["SOLR_SERVER"][0], devel=True, auth=test_config["SOLR_CREDENTIALS"])
     reindexer = Reindexer(
         source=solr, source_coll="source_coll", dest=solr, dest_coll="dest_coll", date_field="date"
     )
     # Make sure only source has data
     self.assertEqual(len(solr.query(self.colls[0], {"q": "*:*", "rows": 10000000}).docs), 50000)
     self.assertEqual(len(solr.query(self.colls[1], {"q": "*:*", "rows": 10000000}).docs), 0)
     reindexer.resume(check=True)
     # Makes sure nothing got indexed
     self.assertEqual(len(solr.query(self.colls[0], {"q": "*:*", "rows": 10000000}).docs), 50000)
     self.assertEqual(len(solr.query(self.colls[1], {"q": "*:*", "rows": 10000000}).docs), 0)
Exemple #23
0
 def test_ignore_fields_disable(self):
     '''
     Checks to make sure ignore_fields override works
     '''
     index = IndexQ(test_config['indexqbase'], 'test_reindexer', size=0)
     reindexer = Reindexer(source=self.solr, source_coll='source_coll', dest=index, ignore_fields=False)
     self.assertEqual(reindexer._ignore_fields, False)
Exemple #24
0
 def test_get_date_facet_counts_without_start_date(self):
     """
     Checks the date_range_query generation function. Since it's pretty simple, running all the tests as one
     """
     self._index_docs(50000, self.colls[0])
     solr = SolrClient(test_config["SOLR_SERVER"][0], devel=True, auth=test_config["SOLR_CREDENTIALS"])
     reindexer = Reindexer(
         source=solr, source_coll="source_coll", dest=solr, dest_coll="dest_coll", date_field="date"
     )
     # Testing this one
     source_facet, dest_facet = reindexer._get_date_facet_counts("DAY", "date")
     for dt_range in source_facet:
         dt = datetime.datetime.strptime(dt_range, "%Y-%m-%dT%H:%M:%SZ").date().isoformat()
         if source_facet[dt_range] != self.date_counts[dt]:
             logging.info("{} - {} - {}".format(dt, source_facet[dt_range], self.date_counts[dt]))
         self.assertEqual(source_facet[dt_range], self.date_counts[dt])
Exemple #25
0
 def test_get_date_facet_counts(self):
     '''
     Checks the date_range_query generation function. Makes sure the date ranges returned matches what got indexed.
     '''
     self._index_docs(50000, self.colls[0])
     solr = SolrClient(test_config['SOLR_SERVER'][0], devel=True, auth=test_config['SOLR_CREDENTIALS'])
     reindexer = Reindexer(source=solr, source_coll='source_coll', dest=solr, dest_coll='dest_coll',
                           date_field='date')
     # Testing this one
     source_facet, dest_facet = reindexer._get_date_facet_counts('DAY', 'date',
                                                                 start_date=self._start_date.date().isoformat())
     for dt_range in source_facet:
         dt = datetime.datetime.strptime(dt_range, '%Y-%m-%dT%H:%M:%SZ').date().isoformat()
         if source_facet[dt_range] != self.date_counts[dt]:
             logging.info("{} - {} - {}".format(dt, source_facet[dt_range], self.date_counts[dt]))
         self.assertEqual(source_facet[dt_range], self.date_counts[dt])
Exemple #26
0
 def test_get_edge_date(self):
     """
     Checks to make sure _get_edge_date returns correct start and end dates. 
     """
     self._index_docs(50000, self.colls[0])
     solr = SolrClient(test_config["SOLR_SERVER"][0], devel=True, auth=test_config["SOLR_CREDENTIALS"])
     reindexer = Reindexer(
         source=solr, source_coll="source_coll", dest=solr, dest_coll="dest_coll", date_field="index_date"
     )
     solr_end_date_string = reindexer._get_edge_date("date", "desc")
     solr_start_date_string = reindexer._get_edge_date("date", "asc")
     self.assertTrue(
         self._start_date.date(), datetime.datetime.strptime(solr_start_date_string, "%Y-%m-%dT%H:%M:%S.%fZ")
     )
     self.assertTrue(
         self._end_date.date(), datetime.datetime.strptime(solr_end_date_string, "%Y-%m-%dT%H:%M:%S.%fZ")
     )
Exemple #27
0
 def test_ignore_fields_override(self):
     '''
     Checks to make sure ignore_fields override works
     '''
     index = IndexQ(test_config['indexqbase'], 'test_reindexer', size=0)
     reindexer = Reindexer(source=self.solr, source_coll='source_coll', dest=index,
                           ignore_fields=['_text_', '_any_other_field'])
     self.assertEqual(reindexer._ignore_fields, ['_text_', '_any_other_field'])
Exemple #28
0
 def test_get_date_facet_counts(self):
     """
     Checks the date_range_query generation function. Makes sure the date ranges returned matches what got indexed. 
     """
     self._index_docs(50000, self.colls[0])
     solr = SolrClient(test_config["SOLR_SERVER"][0], devel=True, auth=test_config["SOLR_CREDENTIALS"])
     reindexer = Reindexer(
         source=solr, source_coll="source_coll", dest=solr, dest_coll="dest_coll", date_field="date"
     )
     # Testing this one
     source_facet, dest_facet = reindexer._get_date_facet_counts(
         "DAY", "date", start_date=self._start_date.date().isoformat()
     )
     for dt_range in source_facet:
         dt = datetime.datetime.strptime(dt_range, "%Y-%m-%dT%H:%M:%SZ").date().isoformat()
         if source_facet[dt_range] != self.date_counts[dt]:
             logging.info("{} - {} - {}".format(dt, source_facet[dt_range], self.date_counts[dt]))
         self.assertEqual(source_facet[dt_range], self.date_counts[dt])
Exemple #29
0
 def test_ignore_fields(self):
     '''
     Will export documents from Solr and put them into an IndexQ.
     '''
     index = IndexQ(test_config['indexqbase'], 'test_reindexer', size=0)
     for dir in ['_todo_dir', '_done_dir']:
         [os.remove(x) for x in index.get_all_as_list(dir=dir)]
     reindexer = Reindexer(source=self.solr, source_coll='source_coll', dest=index)
     for field in ['_version_', 'product_name_exact']:
         self.assertTrue(field in reindexer._ignore_fields)
Exemple #30
0
 def test_solr_to_solr_resume_basic(self):
     """
     Checks the date_range_query generation function. Since it's pretty simple, running all the tests as one
     """
     self._index_docs(50000, self.colls[0])
     solr = SolrClient(test_config["SOLR_SERVER"][0], auth=test_config["SOLR_CREDENTIALS"])
     reindexer = Reindexer(
         source=solr, source_coll="source_coll", dest=solr, dest_coll="dest_coll", date_field="date"
     )
     # Make sure only source has datae
     self.assertEqual(len(solr.query(self.colls[0], {"q": "*:*", "rows": 10000000}).docs), 50000)
     self.assertEqual(len(solr.query(self.colls[1], {"q": "*:*", "rows": 10000000}).docs), 0)
     reindexer.resume()
     sleep(10)
     # Make sure countc match up after reindex
     self.assertEqual(
         len(solr.query(self.colls[0], {"q": "*:*", "rows": 10000000}).docs),
         len(solr.query(self.colls[1], {"q": "*:*", "rows": 10000000}).docs),
     )
Exemple #31
0
 def test_get_date_range_query(self):
     '''
     Checks the date_range_query generation function. Since it's pretty simple, running all the tests as one
     '''
     solr = SolrClient(test_config['SOLR_SERVER'][0], devel=True, auth=test_config['SOLR_CREDENTIALS'])
     reindexer = Reindexer(source=solr, source_coll='source_coll', dest=solr, dest_coll='dest_coll',
                           date_field='index_date')
     self.assertEqual(
         reindexer._get_date_range_query('2015-11-10', '2015-12-11'),
         {'rows': 0, 'facet.range.end': '2015-12-11', 'facet': 'true', 'facet.range': 'index_date',
          'facet.range.start': '2015-11-10', 'q': '*:*', 'facet.range.include': 'all', 'facet.range.gap': '+1DAY'}
     )
     self.assertEqual(
         reindexer._get_date_range_query('2015-11-10', '2015-12-11', date_field='date123'),
         {'rows': 0, 'facet.range.end': '2015-12-11', 'facet': 'true', 'facet.range': 'date123',
          'facet.range.start': '2015-11-10', 'q': '*:*', 'facet.range.include': 'all', 'facet.range.gap': '+1DAY'}
     )
     self.assertEqual(
         reindexer._get_date_range_query('2015-11-10', '2015-12-11', date_field='date123', timespan='MONTH'),
         {'rows': 0, 'facet.range.end': '2015-12-11', 'facet': 'true', 'facet.range': 'date123',
          'facet.range.start': '2015-11-10', 'q': '*:*', 'facet.range.include': 'all', 'facet.range.gap': '+1MONTH'}
     )
     self.assertEqual(
         reindexer._get_date_range_query('2015-11-10', '2015-12-11', timespan='MONTH'),
         {'rows': 0, 'facet.range.end': '2015-12-11', 'facet': 'true', 'facet.range': 'index_date',
          'facet.range.start': '2015-11-10', 'q': '*:*', 'facet.range.include': 'all', 'facet.range.gap': '+1MONTH'}
     )
Exemple #32
0
 def test_solr_to_solr_reindex_and_resume_reverse(self):
     '''
     Only reindexes half of the collection on the first time. Then goes back and does a resume to make sure it works.
     '''
     self._index_docs(50000, self.colls[0])
     solr = SolrClient(test_config['SOLR_SERVER'][0], auth=test_config['SOLR_CREDENTIALS'])
     reindexer = Reindexer(source=solr, source_coll='source_coll', dest=solr, dest_coll='dest_coll',
                           date_field='date')
     # Make sure only source has data
     self.assertEqual(len(solr.query(self.colls[0], {'q': '*:*', 'rows': 10000000}).docs), 50000)
     self.assertEqual(len(solr.query(self.colls[1], {'q': '*:*', 'rows': 10000000}).docs), 0)
     # This gets somehwat of a mid point date in the range.
     midpoint = (datetime.datetime.now() - datetime.timedelta(days=
                                                              ((self._end_date - self._start_date).days / 2)
                                                              ))
     # Reindex approximately half of the data by restricting FQ
     reindexer.reindex(fq=['date:[{} TO *]'.format(midpoint.isoformat() + 'Z')])
     # Make sure we have at least 20% of the data.
     dest_count = len(solr.query(self.colls[1], {'q': '*:*', 'rows': 10000000}).docs)
     s_count = len(solr.query(self.colls[0], {'q': '*:*', 'rows': 10000000}).docs)
     self.assertTrue(s_count > dest_count > s_count * .20)
     reindexer.resume()
     # Make sure countc match up after reindex
     self.assertEqual(
         len(solr.query(self.colls[0], {'q': '*:*', 'rows': 10000000}).docs),
         len(solr.query(self.colls[1], {'q': '*:*', 'rows': 10000000}).docs))
Exemple #33
0
 def test_solr_to_solr_resume_basic(self):
     '''
     Checks the date_range_query generation function. Since it's pretty simple, running all the tests as one
     '''
     self._index_docs(50000, self.colls[0])
     solr = SolrClient(test_config['SOLR_SERVER'][0],
                       auth=test_config['SOLR_CREDENTIALS'])
     reindexer = Reindexer(source=solr,
                           source_coll='source_coll',
                           dest=solr,
                           dest_coll='dest_coll',
                           date_field='date')
     #Make sure only source has datae
     self.assertEqual(
         len(
             solr.query(self.colls[0], {
                 'q': '*:*',
                 'rows': 10000000
             }).docs), 50000)
     self.assertEqual(
         len(
             solr.query(self.colls[1], {
                 'q': '*:*',
                 'rows': 10000000
             }).docs), 0)
     reindexer.resume()
     sleep(10)
     #Make sure countc match up after reindex
     self.assertEqual(
         len(
             solr.query(self.colls[0], {
                 'q': '*:*',
                 'rows': 10000000
             }).docs),
         len(
             solr.query(self.colls[1], {
                 'q': '*:*',
                 'rows': 10000000
             }).docs))
Exemple #34
0
    def test_solr_to_solr_reindexer_per_shard(self):
        self._index_docs(50000, self.colls[0])
        solr = SolrClient(test_config['SOLR_SERVER'][0],
                          auth=test_config['SOLR_CREDENTIALS'])
        #Make sure only source has data
        self.assertEqual(
            len(
                solr.query(self.colls[0], {
                    'q': '*:*',
                    'rows': 10000000
                }).docs), 50000)
        self.assertEqual(
            len(
                solr.query(self.colls[1], {
                    'q': '*:*',
                    'rows': 10000000
                }).docs), 0)

        reindexer = Reindexer(source=solr,
                              source_coll='source_coll_shard1_replica1',
                              dest=solr,
                              dest_coll=self.colls[1],
                              per_shard=True,
                              date_field='date')
        reindexer.reindex()
        reindexer = Reindexer(source=solr,
                              source_coll='source_coll_shard2_replica1',
                              dest=solr,
                              dest_coll=self.colls[1],
                              per_shard=True,
                              date_field='date')
        reindexer.reindex()

        self.solr.commit(self.colls[1], openSearcher=True)
        #sloppy check over here, will improve later
        self.assertEqual(
            len(
                solr.query(self.colls[0], {
                    'q': '*:*',
                    'rows': 10000000
                }).docs),
            len(
                solr.query(self.colls[1], {
                    'q': '*:*',
                    'rows': 10000000
                }).docs))
Exemple #35
0
#Used for some quick troubleshooting
import code
import time
import os
import sys
print(sys.path)
from SolrClient import SolrClient, Reindexer, IndexQ
import logging
logging.basicConfig(
    level=logging.INFO,
    format=
    '%(asctime)s [%(levelname)s] (%(process)d) (%(threadName)-10s) [%(name)s] %(message)s'
)

index = IndexQ('/tmp', 'test_indexq')
solr = SolrClient('http://localhost:8983/solr/')
r = Reindexer(source=solr, source_coll='source_coll', dest=index)
code.interact(local=locals())

import code
code.interact(local=locals())